diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4c3dc63afd878d..3fd5211e4f09f2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, // (select c, -1, y) -> -c | y if (isAllOnesConstant(TrueV)) { SDValue Neg = DAG.getNegative(CondV, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); + return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, -1) -> (c-1) | y if (isAllOnesConstant(FalseV)) { SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); + return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV)); } // (select c, 0, y) -> (c-1) & y if (isNullConstant(TrueV)) { SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); - return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); + return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, 0) -> -c & y if (isNullConstant(FalseV)) { SDValue Neg = DAG.getNegative(CondV, DL, VT); - return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); + return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV)); } } @@ -7290,13 +7290,13 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, // (select !x, x, y) -> x & y if (std::optional MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, - FalseV); + DAG.getFreeze(FalseV)); } // (select x, y, x) -> x & y // (select !x, y, x) -> x | y if (std::optional MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { - return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV, - FalseV); + return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, + DAG.getFreeze(TrueV), FalseV); } } diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index f032756e007b68..e16f6abcca244c 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind { ; RV32I-LABEL: sltiu: ; RV32I: # %bb.0: ; RV32I-NEXT: sltiu a0, a0, 3 -; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: snez a1, a1 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index aa962d68fc5285..5914e45a153302 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV32IA-NEXT: # =>This Loop Header: Depth=1 ; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 ; RV32IA-NEXT: mv a3, a2 -; RV32IA-NEXT: addi a2, a2, 1 -; RV32IA-NEXT: sltu a4, a3, a1 -; RV32IA-NEXT: neg a4, a4 -; RV32IA-NEXT: and a4, a4, a2 +; RV32IA-NEXT: addi a4, a2, 1 +; RV32IA-NEXT: sltu a2, a2, a1 +; RV32IA-NEXT: neg a2, a2 +; RV32IA-NEXT: and a4, a2, a4 ; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start ; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 ; RV64IA-NEXT: mv a3, a2 -; RV64IA-NEXT: addi a2, a2, 1 -; RV64IA-NEXT: sltu a4, a3, a1 -; RV64IA-NEXT: neg a4, a4 -; RV64IA-NEXT: and a4, a4, a2 +; RV64IA-NEXT: addi a4, a2, 1 +; RV64IA-NEXT: sltu a2, a2, a1 +; RV64IA-NEXT: neg a2, a2 +; RV64IA-NEXT: and a4, a2, a4 ; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index d533607ad54e38..0216d00be21854 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind { define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat: ; RV32IZFBFMIN: # %bb.0: # %start -; RV32IZFBFMIN-NEXT: addi sp, sp, -16 -; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: addi sp, sp, -32 +; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg s1, s0 ; RV32IZFBFMIN-NEXT: lui a0, 913408 ; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0 -; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IZFBFMIN-NEXT: neg s3, s2 ; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFBFMIN-NEXT: call __fixsfdi +; RV32IZFBFMIN-NEXT: and a0, s3, a0 +; RV32IZFBFMIN-NEXT: or a0, s1, a0 +; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFBFMIN-NEXT: neg a2, a2 ; RV32IZFBFMIN-NEXT: lui a4, 524288 -; RV32IZFBFMIN-NEXT: lui a2, 524288 -; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2 +; RV32IZFBFMIN-NEXT: li a5, 1 +; RV32IZFBFMIN-NEXT: lui a3, 524288 +; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IZFBFMIN-NEXT: # %bb.1: # %start -; RV32IZFBFMIN-NEXT: mv a2, a1 +; RV32IZFBFMIN-NEXT: mv a3, a1 ; RV32IZFBFMIN-NEXT: .LBB10_2: # %start -; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4 +; RV32IZFBFMIN-NEXT: and a0, a2, a0 +; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4 ; RV32IZFBFMIN-NEXT: # %bb.3: -; RV32IZFBFMIN-NEXT: addi a2, a4, -1 +; RV32IZFBFMIN-NEXT: addi a3, a4, -1 ; RV32IZFBFMIN-NEXT: .LBB10_4: # %start -; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFBFMIN-NEXT: neg a4, a1 -; RV32IZFBFMIN-NEXT: and a1, a4, a2 -; RV32IZFBFMIN-NEXT: neg a2, a3 -; RV32IZFBFMIN-NEXT: neg a3, s0 -; RV32IZFBFMIN-NEXT: and a0, a3, a0 -; RV32IZFBFMIN-NEXT: or a0, a2, a0 -; RV32IZFBFMIN-NEXT: and a0, a4, a0 -; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: addi sp, sp, 16 +; RV32IZFBFMIN-NEXT: and a1, a2, a3 +; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 32 ; RV32IZFBFMIN-NEXT: ret ; ; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat: ; R32IDZFBFMIN: # %bb.0: # %start -; R32IDZFBFMIN-NEXT: addi sp, sp, -16 -; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: addi sp, sp, -32 +; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) +; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 +; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg s1, s0 ; R32IDZFBFMIN-NEXT: lui a0, 913408 ; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0 -; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0 +; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0 +; R32IDZFBFMIN-NEXT: neg s3, s2 ; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 ; R32IDZFBFMIN-NEXT: call __fixsfdi +; R32IDZFBFMIN-NEXT: and a0, s3, a0 +; R32IDZFBFMIN-NEXT: or a0, s1, a0 +; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0 +; R32IDZFBFMIN-NEXT: neg a2, a2 ; R32IDZFBFMIN-NEXT: lui a4, 524288 -; R32IDZFBFMIN-NEXT: lui a2, 524288 -; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2 +; R32IDZFBFMIN-NEXT: li a5, 1 +; R32IDZFBFMIN-NEXT: lui a3, 524288 +; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2 ; R32IDZFBFMIN-NEXT: # %bb.1: # %start -; R32IDZFBFMIN-NEXT: mv a2, a1 +; R32IDZFBFMIN-NEXT: mv a3, a1 ; R32IDZFBFMIN-NEXT: .LBB10_2: # %start -; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) -; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0 -; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4 +; R32IDZFBFMIN-NEXT: and a0, a2, a0 +; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4 ; R32IDZFBFMIN-NEXT: # %bb.3: -; R32IDZFBFMIN-NEXT: addi a2, a4, -1 +; R32IDZFBFMIN-NEXT: addi a3, a4, -1 ; R32IDZFBFMIN-NEXT: .LBB10_4: # %start -; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0 -; R32IDZFBFMIN-NEXT: neg a4, a1 -; R32IDZFBFMIN-NEXT: and a1, a4, a2 -; R32IDZFBFMIN-NEXT: neg a2, a3 -; R32IDZFBFMIN-NEXT: neg a3, s0 -; R32IDZFBFMIN-NEXT: and a0, a3, a0 -; R32IDZFBFMIN-NEXT: or a0, a2, a0 -; R32IDZFBFMIN-NEXT: and a0, a4, a0 -; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: and a1, a2, a3 +; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; R32IDZFBFMIN-NEXT: addi sp, sp, 16 +; R32IDZFBFMIN-NEXT: addi sp, sp, 32 ; R32IDZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_l_bf16_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: addi sp, sp, -16 -; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-NEXT: addi sp, sp, -32 +; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32ID-NEXT: lui a0, %hi(.LCPI10_0) +; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fs0, a0 +; RV32ID-NEXT: flt.s s0, fa5, fs0 +; RV32ID-NEXT: neg s1, s0 ; RV32ID-NEXT: lui a0, 913408 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fle.s s0, fa5, fs0 +; RV32ID-NEXT: fle.s s2, fa5, fs0 +; RV32ID-NEXT: neg s3, s2 ; RV32ID-NEXT: fmv.s fa0, fs0 ; RV32ID-NEXT: call __fixsfdi +; RV32ID-NEXT: and a0, s3, a0 +; RV32ID-NEXT: or a0, s1, a0 +; RV32ID-NEXT: feq.s a2, fs0, fs0 +; RV32ID-NEXT: neg a2, a2 ; RV32ID-NEXT: lui a4, 524288 -; RV32ID-NEXT: lui a2, 524288 -; RV32ID-NEXT: beqz s0, .LBB10_2 +; RV32ID-NEXT: li a5, 1 +; RV32ID-NEXT: lui a3, 524288 +; RV32ID-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-NEXT: # %bb.1: # %start -; RV32ID-NEXT: mv a2, a1 +; RV32ID-NEXT: mv a3, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-NEXT: flt.s a3, fa5, fs0 -; RV32ID-NEXT: beqz a3, .LBB10_4 +; RV32ID-NEXT: and a0, a2, a0 +; RV32ID-NEXT: beqz s0, .LBB10_4 ; RV32ID-NEXT: # %bb.3: -; RV32ID-NEXT: addi a2, a4, -1 +; RV32ID-NEXT: addi a3, a4, -1 ; RV32ID-NEXT: .LBB10_4: # %start -; RV32ID-NEXT: feq.s a1, fs0, fs0 -; RV32ID-NEXT: neg a4, a1 -; RV32ID-NEXT: and a1, a4, a2 -; RV32ID-NEXT: neg a2, a3 -; RV32ID-NEXT: neg a3, s0 -; RV32ID-NEXT: and a0, a3, a0 -; RV32ID-NEXT: or a0, a2, a0 -; RV32ID-NEXT: and a0, a4, a0 -; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-NEXT: and a1, a2, a3 +; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: addi sp, sp, 32 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat: @@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-NEXT: neg s0, a0 ; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero ; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0 -; CHECK32ZFBFMIN-NEXT: neg s1, a0 +; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1 +; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1 ; CHECK32ZFBFMIN-NEXT: call __fixunssfdi ; CHECK32ZFBFMIN-NEXT: and a0, s1, a0 ; CHECK32ZFBFMIN-NEXT: or a0, s0, a0 @@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: xori a0, a0, 1 +; RV32ID-NEXT: addi s1, a0, -1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: or a0, s0, a0 diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index eb8ffe75ef7697..f2e37f55521bac 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -749,40 +749,47 @@ define i64 @fcvt_l_d(double %a) nounwind { define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IFD-LABEL: fcvt_l_d_sat: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: lui a0, %hi(.LCPI12_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI12_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI12_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB12_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB12_2 ; RV32IFD-NEXT: # %bb.1: # %start -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB12_2: # %start -; RV32IFD-NEXT: lui a1, %hi(.LCPI12_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB12_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB12_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB12_4: # %start -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_l_d_sat: @@ -800,40 +807,45 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) -; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 +; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: lui a5, 524288 -; RV32IZFINXZDINX-NEXT: lui a3, 524288 -; RV32IZFINXZDINX-NEXT: beqz a2, .LBB12_2 +; RV32IZFINXZDINX-NEXT: li a6, 1 +; RV32IZFINXZDINX-NEXT: lui a4, 524288 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB12_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: # %start -; RV32IZFINXZDINX-NEXT: mv a3, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB12_2: # %start -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI12_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI12_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI12_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a4, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a4, .LBB12_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB12_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a3, a5, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB12_4: # %start -; RV32IZFINXZDINX-NEXT: feq.d a1, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: and a1, a5, a3 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a4 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1013,23 +1025,23 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: lui a0, %hi(.LCPI14_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi -; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a2) -; RV32IFD-NEXT: and a0, s0, a0 -; RV32IFD-NEXT: flt.d a2, fa5, fs0 -; RV32IFD-NEXT: neg a2, a2 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a1, s0, a1 -; RV32IFD-NEXT: or a1, a2, a1 +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -1054,11 +1066,12 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero -; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0) -; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI14_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 ; RV32IZFINXZDINX-NEXT: neg a3, a3 diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index b8c6e84502408f..ff2d8e00630071 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -50,41 +50,48 @@ define signext i32 @test_floor_si32(double %x) { define i64 @test_floor_si64(double %x) nounwind { ; RV32IFD-LABEL: test_floor_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call floor ; RV32IFD-NEXT: lui a0, %hi(.LCPI1_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI1_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB1_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB1_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB1_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB1_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB1_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB1_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_floor_si64: @@ -101,44 +108,47 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call floor -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB1_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB1_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB1_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI1_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI1_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB1_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB1_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -208,7 +218,8 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -235,29 +246,28 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call floor ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI3_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI3_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI3_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -316,41 +326,48 @@ define signext i32 @test_ceil_si32(double %x) { define i64 @test_ceil_si64(double %x) nounwind { ; RV32IFD-LABEL: test_ceil_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call ceil ; RV32IFD-NEXT: lui a0, %hi(.LCPI5_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI5_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB5_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB5_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB5_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB5_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB5_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB5_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_ceil_si64: @@ -367,44 +384,47 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call ceil -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB5_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB5_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB5_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI5_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI5_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB5_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB5_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -474,7 +494,8 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -501,29 +522,28 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call ceil ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI7_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI7_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI7_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -582,41 +602,48 @@ define signext i32 @test_trunc_si32(double %x) { define i64 @test_trunc_si64(double %x) nounwind { ; RV32IFD-LABEL: test_trunc_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call trunc ; RV32IFD-NEXT: lui a0, %hi(.LCPI9_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI9_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB9_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB9_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB9_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB9_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB9_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB9_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_trunc_si64: @@ -633,44 +660,47 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call trunc -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB9_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB9_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB9_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI9_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI9_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB9_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB9_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -740,7 +770,8 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -767,29 +798,28 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call trunc ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI11_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI11_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI11_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -848,41 +878,48 @@ define signext i32 @test_round_si32(double %x) { define i64 @test_round_si64(double %x) nounwind { ; RV32IFD-LABEL: test_round_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call round ; RV32IFD-NEXT: lui a0, %hi(.LCPI13_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI13_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB13_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB13_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB13_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB13_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB13_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB13_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_round_si64: @@ -899,44 +936,47 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call round -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB13_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB13_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB13_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI13_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI13_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB13_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB13_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1006,7 +1046,8 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1033,29 +1074,28 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call round ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI15_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI15_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI15_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1114,41 +1154,48 @@ define signext i32 @test_roundeven_si32(double %x) { define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IFD-LABEL: test_roundeven_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call roundeven ; RV32IFD-NEXT: lui a0, %hi(.LCPI17_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI17_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB17_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB17_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB17_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB17_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB17_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB17_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_roundeven_si64: @@ -1165,44 +1212,47 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call roundeven -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB17_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB17_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB17_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI17_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI17_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB17_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB17_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1272,7 +1322,8 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1299,29 +1350,28 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call roundeven ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI19_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI19_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI19_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1380,41 +1430,48 @@ define signext i32 @test_rint_si32(double %x) { define i64 @test_rint_si64(double %x) nounwind { ; RV32IFD-LABEL: test_rint_si64: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call rint ; RV32IFD-NEXT: lui a0, %hi(.LCPI21_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0) +; RV32IFD-NEXT: lui a0, %hi(.LCPI21_1) +; RV32IFD-NEXT: fld fa4, %lo(.LCPI21_1)(a0) ; RV32IFD-NEXT: fmv.d fs0, fa0 -; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: flt.d s0, fa5, fa0 +; RV32IFD-NEXT: neg s1, s0 +; RV32IFD-NEXT: fle.d s2, fa4, fa0 +; RV32IFD-NEXT: neg s3, s2 ; RV32IFD-NEXT: call __fixdfdi +; RV32IFD-NEXT: and a0, s3, a0 +; RV32IFD-NEXT: or a0, s1, a0 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: neg a2, a2 ; RV32IFD-NEXT: lui a4, 524288 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: beqz s0, .LBB21_2 +; RV32IFD-NEXT: li a5, 1 +; RV32IFD-NEXT: lui a3, 524288 +; RV32IFD-NEXT: bne s2, a5, .LBB21_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: mv a3, a1 ; RV32IFD-NEXT: .LBB21_2: -; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1) -; RV32IFD-NEXT: flt.d a3, fa5, fs0 -; RV32IFD-NEXT: beqz a3, .LBB21_4 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: beqz s0, .LBB21_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: addi a3, a4, -1 ; RV32IFD-NEXT: .LBB21_4: -; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a4, a1 -; RV32IFD-NEXT: and a1, a4, a2 -; RV32IFD-NEXT: neg a2, a3 -; RV32IFD-NEXT: neg a3, s0 -; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: or a0, a2, a0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: and a1, a2, a3 +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_rint_si64: @@ -1431,44 +1488,47 @@ define i64 @test_rint_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call rint -; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 0(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) -; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s3, s2 ; RV32IZFINXZDINX-NEXT: call __fixdfdi +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_1) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s3, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: li a6, 1 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s0, .LBB21_2 +; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB21_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a4, a1 ; RV32IZFINXZDINX-NEXT: .LBB21_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI21_1)(a1) -; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI21_1+4)(a1) -; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a4, a5, -1 ; RV32IZFINXZDINX-NEXT: .LBB21_4: -; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: neg a2, s0 -; RV32IZFINXZDINX-NEXT: and a0, a2, a0 -; RV32IZFINXZDINX-NEXT: neg a2, a3 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a4 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1538,7 +1598,8 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: addi s1, a0, -1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: or a0, s0, a0 @@ -1565,29 +1626,28 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call rint ; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) ; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a2 -; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI23_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI23_0)(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI23_0+4)(a3) +; RV32IZFINXZDINX-NEXT: xori a2, a2, 1 +; RV32IZFINXZDINX-NEXT: addi a2, a2, -1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV32IZFINXZDINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index f1e444b5b624b4..1a0e4e18291158 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -275,24 +275,26 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: or a0, a0, s1 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -613,40 +615,47 @@ define i64 @fcvt_l_s(float %a) nounwind { define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IF-LABEL: fcvt_l_s_sat: ; RV32IF: # %bb.0: # %start -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a0) ; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: flt.s s0, fa5, fa0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 -; RV32IF-NEXT: fle.s s0, fa5, fa0 +; RV32IF-NEXT: fle.s s2, fa5, fa0 +; RV32IF-NEXT: neg s3, s2 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: and a0, s3, a0 +; RV32IF-NEXT: or a0, s1, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB12_2 +; RV32IF-NEXT: li a5, 1 +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: bne s2, a5, .LBB12_2 ; RV32IF-NEXT: # %bb.1: # %start -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a3, a1 ; RV32IF-NEXT: .LBB12_2: # %start -; RV32IF-NEXT: lui a1, %hi(.LCPI12_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: beqz a3, .LBB12_4 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: beqz s0, .LBB12_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a3, a4, -1 ; RV32IF-NEXT: .LBB12_4: # %start -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: neg a3, s0 -; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: and a1, a2, a3 +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_l_s_sat: @@ -664,35 +673,38 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 913408 ; RV32IZFINX-NEXT: fle.s s1, a0, s0 +; RV32IZFINX-NEXT: neg s2, s1 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixsfdi +; RV32IZFINX-NEXT: lui a2, %hi(.LCPI12_0) +; RV32IZFINX-NEXT: lw a2, %lo(.LCPI12_0)(a2) +; RV32IZFINX-NEXT: and a0, s2, a0 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: feq.s a2, s0, s0 +; RV32IZFINX-NEXT: neg a2, a2 +; RV32IZFINX-NEXT: lui a5, 524288 +; RV32IZFINX-NEXT: li a6, 1 ; RV32IZFINX-NEXT: lui a4, 524288 -; RV32IZFINX-NEXT: lui a2, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB12_2 +; RV32IZFINX-NEXT: bne s1, a6, .LBB12_2 ; RV32IZFINX-NEXT: # %bb.1: # %start -; RV32IZFINX-NEXT: mv a2, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB12_2: # %start -; RV32IZFINX-NEXT: lui a1, %hi(.LCPI12_0) -; RV32IZFINX-NEXT: lw a1, %lo(.LCPI12_0)(a1) -; RV32IZFINX-NEXT: flt.s a3, a1, s0 +; RV32IZFINX-NEXT: and a0, a2, a0 ; RV32IZFINX-NEXT: beqz a3, .LBB12_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: addi a2, a4, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB12_4: # %start -; RV32IZFINX-NEXT: feq.s a1, s0, s0 -; RV32IZFINX-NEXT: neg a4, a1 -; RV32IZFINX-NEXT: and a1, a4, a2 -; RV32IZFINX-NEXT: neg a2, s1 -; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: neg a2, a3 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a0, a4, a0 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -863,23 +875,23 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a0, %hi(.LCPI14_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a0) +; RV32IF-NEXT: flt.s a0, fa5, fa0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.w.x fa5, zero ; RV32IF-NEXT: fle.s a0, fa5, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: addi s1, a0, -1 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: or a0, s0, a0 +; RV32IF-NEXT: and a1, s1, a1 +; RV32IF-NEXT: or a1, s0, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -898,19 +910,18 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: mv s0, a0 -; RV32IZFINX-NEXT: fle.s a0, zero, a0 -; RV32IZFINX-NEXT: neg s1, a0 -; RV32IZFINX-NEXT: mv a0, s0 +; RV32IZFINX-NEXT: lui a1, %hi(.LCPI14_0) +; RV32IZFINX-NEXT: lw a1, %lo(.LCPI14_0)(a1) +; RV32IZFINX-NEXT: flt.s a1, a1, a0 +; RV32IZFINX-NEXT: neg s0, a1 +; RV32IZFINX-NEXT: fle.s a1, zero, a0 +; RV32IZFINX-NEXT: xori a1, a1, 1 +; RV32IZFINX-NEXT: addi s1, a1, -1 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI14_0)(a2) ; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: or a0, s0, a0 ; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: or a1, s0, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -928,36 +939,33 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; ; RV32I-LABEL: fcvt_lu_s_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 391168 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 ; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: and s3, s2, a0 -; RV32I-NEXT: lui a1, 391168 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a1, a0 -; RV32I-NEXT: or a0, a1, s3 -; RV32I-NEXT: and a2, s2, s1 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: and a1, s2, a1 +; RV32I-NEXT: or a1, s1, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_lu_s_sat: @@ -966,24 +974,26 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lui a1, 391168 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: call __gtsf2 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: neg s1, a0 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2 ; RV64I-NEXT: slti a0, a0, 0 -; RV64I-NEXT: addi s1, a0, -1 +; RV64I-NEXT: addi s2, a0, -1 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixunssfdi -; RV64I-NEXT: and s1, s1, a0 -; RV64I-NEXT: lui a1, 391168 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __gtsf2 -; RV64I-NEXT: sgtz a0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: or a0, a0, s1 +; RV64I-NEXT: and a0, s2, a0 +; RV64I-NEXT: or a0, s1, a0 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret start: @@ -2089,24 +2099,26 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and s1, s1, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: or a0, a0, s1 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll index 5e99c7eb905628..f91aac11876d41 100644 --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -37,7 +37,8 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -52,32 +53,34 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI1_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB1_4 +; RV32IF-NEXT: bne s0, a6, .LBB1_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB1_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB1_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB1_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -115,23 +118,24 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB1_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB1_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB1_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB1_6 +; RV32IZFINX-NEXT: beqz a3, .LBB1_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB1_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -180,8 +184,7 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -193,22 +196,22 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rdn ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB3_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -226,7 +229,6 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -237,21 +239,21 @@ define i64 @test_floor_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rdn ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB3_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI3_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -297,7 +299,8 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -312,32 +315,34 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI5_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB5_4 +; RV32IF-NEXT: bne s0, a6, .LBB5_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB5_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB5_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB5_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -375,23 +380,24 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB5_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB5_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB5_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB5_6 +; RV32IZFINX-NEXT: beqz a3, .LBB5_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB5_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -440,8 +446,7 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -453,22 +458,22 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rup ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB7_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -486,7 +491,6 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -497,21 +501,21 @@ define i64 @test_ceil_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rup ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB7_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI7_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -557,7 +561,8 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -572,32 +577,34 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB9_4 +; RV32IF-NEXT: bne s0, a6, .LBB9_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB9_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB9_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB9_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -635,23 +642,24 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB9_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB9_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB9_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB9_6 +; RV32IZFINX-NEXT: beqz a3, .LBB9_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB9_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -700,8 +708,7 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -713,22 +720,22 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rtz ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB11_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -746,7 +753,6 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -757,21 +763,21 @@ define i64 @test_trunc_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rtz ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB11_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI11_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -817,7 +823,8 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -832,32 +839,34 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI13_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB13_4 +; RV32IF-NEXT: bne s0, a6, .LBB13_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB13_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB13_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB13_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -895,23 +904,24 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB13_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB13_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB13_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB13_6 +; RV32IZFINX-NEXT: beqz a3, .LBB13_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB13_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -960,8 +970,7 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -973,22 +982,22 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rmm ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB15_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1006,7 +1015,6 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1017,21 +1025,21 @@ define i64 @test_round_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rmm ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB15_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI15_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -1077,7 +1085,8 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1092,32 +1101,34 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI17_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB17_4 +; RV32IF-NEXT: bne s0, a6, .LBB17_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB17_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB17_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB17_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1155,23 +1166,24 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB17_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB17_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB17_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB17_6 +; RV32IZFINX-NEXT: beqz a3, .LBB17_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB17_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1220,8 +1232,7 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1233,22 +1244,22 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0, rne ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB19_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1266,7 +1277,6 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1277,21 +1287,21 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rne ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB19_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI19_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; @@ -1337,7 +1347,8 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1352,32 +1363,34 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi +; RV32IF-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a2) +; RV32IF-NEXT: and a0, s1, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: li a6, 1 ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: beqz s0, .LBB21_4 +; RV32IF-NEXT: bne s0, a6, .LBB21_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a4, a1 ; RV32IF-NEXT: .LBB21_4: -; RV32IF-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1) -; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: and a0, a2, a0 ; RV32IF-NEXT: beqz a3, .LBB21_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: addi a4, a5, -1 ; RV32IF-NEXT: .LBB21_6: -; RV32IF-NEXT: feq.s a1, fs0, fs0 -; RV32IF-NEXT: neg a4, a1 -; RV32IF-NEXT: and a1, a4, a2 -; RV32IF-NEXT: neg a2, s0 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: and a1, a2, a4 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1415,23 +1428,24 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a4, a2, s0 -; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: flt.s a3, a2, s0 +; RV32IZFINX-NEXT: neg a2, a3 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a3, 524288 -; RV32IZFINX-NEXT: beqz s1, .LBB21_4 +; RV32IZFINX-NEXT: li a6, 1 +; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: bne s1, a6, .LBB21_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: mv a4, a1 ; RV32IZFINX-NEXT: .LBB21_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a4, .LBB21_6 +; RV32IZFINX-NEXT: beqz a3, .LBB21_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: addi a4, a5, -1 ; RV32IZFINX-NEXT: .LBB21_6: -; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: and a1, a2, a4 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1480,8 +1494,7 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1493,22 +1506,22 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IF-NEXT: fcvt.s.w fa5, a0 ; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 ; RV32IF-NEXT: .LBB23_2: -; RV32IF-NEXT: fmv.w.x fa5, zero -; RV32IF-NEXT: fle.s a0, fa5, fs0 -; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a2) -; RV32IF-NEXT: and a0, s0, a0 -; RV32IF-NEXT: flt.s a2, fa5, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: and a1, s0, a1 -; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a2, fa5, fs0 +; RV32IF-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a3) +; RV32IF-NEXT: xori a2, a2, 1 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: or a1, a3, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1526,7 +1539,6 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: addi sp, sp, -16 ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: mv s0, a0 ; RV32IZFINX-NEXT: lui a0, 307200 ; RV32IZFINX-NEXT: fabs.s a1, s0 @@ -1537,21 +1549,21 @@ define i64 @test_rint_ui64(float %x) nounwind { ; RV32IZFINX-NEXT: fcvt.s.w a0, a0 ; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 ; RV32IZFINX-NEXT: .LBB23_2: -; RV32IZFINX-NEXT: fle.s a0, zero, s0 -; RV32IZFINX-NEXT: neg s1, a0 ; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi -; RV32IZFINX-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: flt.s a2, a2, s0 -; RV32IZFINX-NEXT: neg a2, a2 -; RV32IZFINX-NEXT: or a0, a2, a0 -; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: fle.s a2, zero, s0 +; RV32IZFINX-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFINX-NEXT: lw a3, %lo(.LCPI23_0)(a3) +; RV32IZFINX-NEXT: xori a2, a2, 1 +; RV32IZFINX-NEXT: addi a2, a2, -1 +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: flt.s a3, a3, s0 +; RV32IZFINX-NEXT: neg a3, a3 +; RV32IZFINX-NEXT: or a0, a3, a0 +; RV32IZFINX-NEXT: and a1, a2, a1 +; RV32IZFINX-NEXT: or a1, a3, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: addi sp, sp, 16 ; RV32IZFINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index f6a53a9d76dd35..2b198afb47a9ae 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -3567,8 +3567,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: neg a3, a0 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a4, 0(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 @@ -3672,7 +3672,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: .LBB52_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: seqz a2, a1 +; RV32-NEXT: snez a2, a1 +; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: mv a2, a4 ; RV32-NEXT: bnez a0, .LBB52_1 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 9e93ad0043a7e0..6bfacc3e9814b4 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -115,7 +115,8 @@ define i32 @utest_f64i32(double %x) { ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixunsdfdi ; RV32IF-NEXT: sltiu a2, a0, -1 -; RV32IF-NEXT: seqz a1, a1 +; RV32IF-NEXT: snez a1, a1 +; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: or a0, a1, a0 @@ -430,7 +431,8 @@ define i32 @utesth_f16i32(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: call __fixunssfdi ; RV32-NEXT: sltiu a2, a0, -1 -; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: or a0, a1, a0 @@ -1043,8 +1045,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a1, 12(sp) ; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: lui a3, 524288 @@ -1052,25 +1054,25 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: beq a1, a5, .LBB18_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: bnez a7, .LBB18_3 ; RV32IF-NEXT: j .LBB18_4 ; RV32IF-NEXT: .LBB18_2: ; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: beqz a7, .LBB18_4 ; RV32IF-NEXT: .LBB18_3: # %entry -; RV32IF-NEXT: slti a6, a0, 0 +; RV32IF-NEXT: slti a6, a2, 0 ; RV32IF-NEXT: .LBB18_4: # %entry -; RV32IF-NEXT: neg a7, a6 -; RV32IF-NEXT: addi t0, a6, -1 +; RV32IF-NEXT: addi a7, a6, -1 +; RV32IF-NEXT: neg t0, a6 ; RV32IF-NEXT: bnez a6, .LBB18_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB18_6: # %entry -; RV32IF-NEXT: or a4, t0, a4 -; RV32IF-NEXT: and a5, a7, a0 -; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: and a5, t0, a0 ; RV32IF-NEXT: beq a1, a3, .LBB18_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a3, a1 @@ -1078,11 +1080,11 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: .LBB18_8: ; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB18_9: # %entry -; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: and a5, a5, a2 ; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a2, a3, .LBB18_11 +; RV32IF-NEXT: beq a5, a3, .LBB18_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: slti a0, a2, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB18_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB18_13 @@ -1142,8 +1144,8 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: lui a3, 524288 @@ -1151,25 +1153,25 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: beq a1, a5, .LBB18_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: bnez a7, .LBB18_3 ; RV32IFD-NEXT: j .LBB18_4 ; RV32IFD-NEXT: .LBB18_2: ; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: beqz a7, .LBB18_4 ; RV32IFD-NEXT: .LBB18_3: # %entry -; RV32IFD-NEXT: slti a6, a0, 0 +; RV32IFD-NEXT: slti a6, a2, 0 ; RV32IFD-NEXT: .LBB18_4: # %entry -; RV32IFD-NEXT: neg a7, a6 -; RV32IFD-NEXT: addi t0, a6, -1 +; RV32IFD-NEXT: addi a7, a6, -1 +; RV32IFD-NEXT: neg t0, a6 ; RV32IFD-NEXT: bnez a6, .LBB18_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB18_6: # %entry -; RV32IFD-NEXT: or a4, t0, a4 -; RV32IFD-NEXT: and a5, a7, a0 -; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: and a5, t0, a0 ; RV32IFD-NEXT: beq a1, a3, .LBB18_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a3, a1 @@ -1177,11 +1179,11 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB18_8: ; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB18_9: # %entry -; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: and a5, a5, a2 ; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a2, a3, .LBB18_11 +; RV32IFD-NEXT: beq a5, a3, .LBB18_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: slti a0, a2, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB18_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB18_13 @@ -1227,8 +1229,10 @@ define i64 @utest_f64i64(double %x) { ; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 @@ -1267,8 +1271,10 @@ define i64 @utest_f64i64(double %x) { ; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 @@ -1440,8 +1446,8 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -1449,25 +1455,25 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: beq a1, a5, .LBB21_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB21_3 ; RV32-NEXT: j .LBB21_4 ; RV32-NEXT: .LBB21_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB21_4 ; RV32-NEXT: .LBB21_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB21_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB21_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB21_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -1475,11 +1481,11 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .LBB21_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB21_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB21_11 +; RV32-NEXT: beq a5, a3, .LBB21_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB21_11: # %entry ; RV32-NEXT: bnez a0, .LBB21_13 @@ -1523,8 +1529,10 @@ define i64 @utest_f32i64(float %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -1657,8 +1665,8 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -1666,25 +1674,25 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: beq a1, a5, .LBB24_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB24_3 ; RV32-NEXT: j .LBB24_4 ; RV32-NEXT: .LBB24_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB24_4 ; RV32-NEXT: .LBB24_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB24_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB24_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB24_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -1692,11 +1700,11 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: .LBB24_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB24_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB24_11 +; RV32-NEXT: beq a5, a3, .LBB24_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB24_11: # %entry ; RV32-NEXT: bnez a0, .LBB24_13 @@ -1772,8 +1780,10 @@ define i64 @utesth_f16i64(half %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -2891,8 +2901,8 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a1, 12(sp) ; RV32IF-NEXT: lw a4, 8(sp) ; RV32IF-NEXT: lui a3, 524288 @@ -2900,25 +2910,25 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: beq a1, a5, .LBB45_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: sltu a6, a1, a5 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: bnez a7, .LBB45_3 ; RV32IF-NEXT: j .LBB45_4 ; RV32IF-NEXT: .LBB45_2: ; RV32IF-NEXT: sltiu a6, a4, -1 -; RV32IF-NEXT: or a7, a2, a0 +; RV32IF-NEXT: or a7, a0, a2 ; RV32IF-NEXT: beqz a7, .LBB45_4 ; RV32IF-NEXT: .LBB45_3: # %entry -; RV32IF-NEXT: slti a6, a0, 0 +; RV32IF-NEXT: slti a6, a2, 0 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: neg a7, a6 -; RV32IF-NEXT: addi t0, a6, -1 +; RV32IF-NEXT: addi a7, a6, -1 +; RV32IF-NEXT: neg t0, a6 ; RV32IF-NEXT: bnez a6, .LBB45_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: or a4, t0, a4 -; RV32IF-NEXT: and a5, a7, a0 -; RV32IF-NEXT: and a2, a7, a2 +; RV32IF-NEXT: or a4, a7, a4 +; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: and a5, t0, a0 ; RV32IF-NEXT: beq a1, a3, .LBB45_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a3, a1 @@ -2926,11 +2936,11 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: .LBB45_8: ; RV32IF-NEXT: snez a0, a4 ; RV32IF-NEXT: .LBB45_9: # %entry -; RV32IF-NEXT: and a2, a2, a5 +; RV32IF-NEXT: and a5, a5, a2 ; RV32IF-NEXT: li a3, -1 -; RV32IF-NEXT: beq a2, a3, .LBB45_11 +; RV32IF-NEXT: beq a5, a3, .LBB45_11 ; RV32IF-NEXT: # %bb.10: # %entry -; RV32IF-NEXT: slti a0, a5, 0 +; RV32IF-NEXT: slti a0, a2, 0 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: .LBB45_11: # %entry ; RV32IF-NEXT: bnez a0, .LBB45_13 @@ -2990,8 +3000,8 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: lw a4, 8(sp) ; RV32IFD-NEXT: lui a3, 524288 @@ -2999,25 +3009,25 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: beq a1, a5, .LBB45_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: sltu a6, a1, a5 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: bnez a7, .LBB45_3 ; RV32IFD-NEXT: j .LBB45_4 ; RV32IFD-NEXT: .LBB45_2: ; RV32IFD-NEXT: sltiu a6, a4, -1 -; RV32IFD-NEXT: or a7, a2, a0 +; RV32IFD-NEXT: or a7, a0, a2 ; RV32IFD-NEXT: beqz a7, .LBB45_4 ; RV32IFD-NEXT: .LBB45_3: # %entry -; RV32IFD-NEXT: slti a6, a0, 0 +; RV32IFD-NEXT: slti a6, a2, 0 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: neg a7, a6 -; RV32IFD-NEXT: addi t0, a6, -1 +; RV32IFD-NEXT: addi a7, a6, -1 +; RV32IFD-NEXT: neg t0, a6 ; RV32IFD-NEXT: bnez a6, .LBB45_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: or a4, t0, a4 -; RV32IFD-NEXT: and a5, a7, a0 -; RV32IFD-NEXT: and a2, a7, a2 +; RV32IFD-NEXT: or a4, a7, a4 +; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: and a5, t0, a0 ; RV32IFD-NEXT: beq a1, a3, .LBB45_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a3, a1 @@ -3025,11 +3035,11 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .LBB45_8: ; RV32IFD-NEXT: snez a0, a4 ; RV32IFD-NEXT: .LBB45_9: # %entry -; RV32IFD-NEXT: and a2, a2, a5 +; RV32IFD-NEXT: and a5, a5, a2 ; RV32IFD-NEXT: li a3, -1 -; RV32IFD-NEXT: beq a2, a3, .LBB45_11 +; RV32IFD-NEXT: beq a5, a3, .LBB45_11 ; RV32IFD-NEXT: # %bb.10: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 +; RV32IFD-NEXT: slti a0, a2, 0 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: .LBB45_11: # %entry ; RV32IFD-NEXT: bnez a0, .LBB45_13 @@ -3073,8 +3083,10 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: seqz a4, a4 +; RV32IF-NEXT: seqz a4, a0 +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: addi a5, a5, -1 +; RV32IF-NEXT: and a4, a5, a4 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 @@ -3113,8 +3125,10 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: seqz a4, a4 +; RV32IFD-NEXT: seqz a4, a0 +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: addi a5, a5, -1 +; RV32IFD-NEXT: and a4, a5, a4 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 @@ -3144,30 +3158,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 8(sp) -; RV32IF-NEXT: lw a1, 12(sp) -; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a1, 8(sp) +; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a2, .LBB47_2 +; RV32IF-NEXT: beqz a0, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a4, a2, 0 +; RV32IF-NEXT: slti a4, a0, 0 ; RV32IF-NEXT: j .LBB47_3 ; RV32IF-NEXT: .LBB47_2: ; RV32IF-NEXT: seqz a4, a3 ; RV32IF-NEXT: .LBB47_3: # %entry ; RV32IF-NEXT: xori a3, a3, 1 -; RV32IF-NEXT: or a3, a3, a2 +; RV32IF-NEXT: or a3, a3, a0 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a3, a3, a4 ; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: and a2, a3, a2 ; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: and a2, a3, a2 -; RV32IF-NEXT: slti a2, a2, 0 -; RV32IF-NEXT: addi a2, a2, -1 -; RV32IF-NEXT: and a0, a2, a0 -; RV32IF-NEXT: and a1, a2, a1 +; RV32IF-NEXT: slti a0, a0, 0 +; RV32IF-NEXT: addi a3, a0, -1 +; RV32IF-NEXT: and a0, a3, a1 +; RV32IF-NEXT: and a1, a3, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -3202,30 +3216,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a1, 8(sp) +; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a2, .LBB47_2 +; RV32IFD-NEXT: beqz a0, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a4, a2, 0 +; RV32IFD-NEXT: slti a4, a0, 0 ; RV32IFD-NEXT: j .LBB47_3 ; RV32IFD-NEXT: .LBB47_2: ; RV32IFD-NEXT: seqz a4, a3 ; RV32IFD-NEXT: .LBB47_3: # %entry ; RV32IFD-NEXT: xori a3, a3, 1 -; RV32IFD-NEXT: or a3, a3, a2 +; RV32IFD-NEXT: or a3, a3, a0 ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a3, a3, a4 ; RV32IFD-NEXT: neg a3, a3 +; RV32IFD-NEXT: and a2, a3, a2 ; RV32IFD-NEXT: and a1, a3, a1 ; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: and a2, a3, a2 -; RV32IFD-NEXT: slti a2, a2, 0 -; RV32IFD-NEXT: addi a2, a2, -1 -; RV32IFD-NEXT: and a0, a2, a0 -; RV32IFD-NEXT: and a1, a2, a1 +; RV32IFD-NEXT: slti a0, a0, 0 +; RV32IFD-NEXT: addi a3, a0, -1 +; RV32IFD-NEXT: and a0, a3, a1 +; RV32IFD-NEXT: and a1, a3, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3246,8 +3260,8 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -3255,25 +3269,25 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: beq a1, a5, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB48_3 ; RV32-NEXT: j .LBB48_4 ; RV32-NEXT: .LBB48_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB48_4 ; RV32-NEXT: .LBB48_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB48_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB48_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -3281,11 +3295,11 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .LBB48_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB48_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB48_11 +; RV32-NEXT: beq a5, a3, .LBB48_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB48_11: # %entry ; RV32-NEXT: bnez a0, .LBB48_13 @@ -3327,8 +3341,10 @@ define i64 @utest_f32i64_mm(float %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -3370,30 +3386,30 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB50_2 +; RV32-NEXT: beqz a0, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB50_3 ; RV32-NEXT: .LBB50_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB50_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3437,8 +3453,8 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a4, 8(sp) ; RV32-NEXT: lui a3, 524288 @@ -3446,25 +3462,25 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: beq a1, a5, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a6, a1, a5 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: bnez a7, .LBB51_3 ; RV32-NEXT: j .LBB51_4 ; RV32-NEXT: .LBB51_2: ; RV32-NEXT: sltiu a6, a4, -1 -; RV32-NEXT: or a7, a2, a0 +; RV32-NEXT: or a7, a0, a2 ; RV32-NEXT: beqz a7, .LBB51_4 ; RV32-NEXT: .LBB51_3: # %entry -; RV32-NEXT: slti a6, a0, 0 +; RV32-NEXT: slti a6, a2, 0 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: neg a7, a6 -; RV32-NEXT: addi t0, a6, -1 +; RV32-NEXT: addi a7, a6, -1 +; RV32-NEXT: neg t0, a6 ; RV32-NEXT: bnez a6, .LBB51_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: or a4, t0, a4 -; RV32-NEXT: and a5, a7, a0 -; RV32-NEXT: and a2, a7, a2 +; RV32-NEXT: or a4, a7, a4 +; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: and a5, t0, a0 ; RV32-NEXT: beq a1, a3, .LBB51_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -3472,11 +3488,11 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: .LBB51_8: ; RV32-NEXT: snez a0, a4 ; RV32-NEXT: .LBB51_9: # %entry -; RV32-NEXT: and a2, a2, a5 +; RV32-NEXT: and a5, a5, a2 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: beq a2, a3, .LBB51_11 +; RV32-NEXT: beq a5, a3, .LBB51_11 ; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a0, a2, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: .LBB51_11: # %entry ; RV32-NEXT: bnez a0, .LBB51_13 @@ -3550,8 +3566,10 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: snez a5, a1 +; RV32-NEXT: addi a5, a5, -1 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 @@ -3595,30 +3613,30 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a2, 20(sp) +; RV32-NEXT: lw a0, 20(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB53_2 +; RV32-NEXT: beqz a0, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a4, a0, 0 ; RV32-NEXT: j .LBB53_3 ; RV32-NEXT: .LBB53_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB53_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: or a3, a3, a0 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: slti a2, a2, 0 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: addi a3, a0, -1 +; RV32-NEXT: and a0, a3, a1 +; RV32-NEXT: and a1, a3, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index daaceed3941c53..518cd7da2ab771 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2145,41 +2145,48 @@ define i64 @fcvt_l_h(half %a) nounwind { define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_l_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: addi sp, sp, -32 +; RV32IZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: flt.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 -; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: fle.s s2, fa5, fs0 +; RV32IZFH-NEXT: neg s3, s2 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: and a0, s3, a0 +; RV32IZFH-NEXT: or a0, s1, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB10_2 +; RV32IZFH-NEXT: li a5, 1 +; RV32IZFH-NEXT: lui a3, 524288 +; RV32IZFH-NEXT: bne s2, a5, .LBB10_2 ; RV32IZFH-NEXT: # %bb.1: # %start -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a3, a1 ; RV32IZFH-NEXT: .LBB10_2: # %start -; RV32IZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB10_4 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: beqz s0, .LBB10_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: addi a3, a4, -1 ; RV32IZFH-NEXT: .LBB10_4: # %start -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: neg a3, s0 -; RV32IZFH-NEXT: and a0, a3, a0 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: and a1, a2, a3 +; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 32 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_l_h_sat: @@ -2193,41 +2200,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFH-LABEL: fcvt_l_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: addi sp, sp, -16 -; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: addi sp, sp, -32 +; RV32IDZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFH-NEXT: flt.s s0, fa5, fs0 +; RV32IDZFH-NEXT: neg s1, s0 ; RV32IDZFH-NEXT: lui a0, 913408 ; RV32IDZFH-NEXT: fmv.w.x fa5, a0 -; RV32IDZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IDZFH-NEXT: fle.s s2, fa5, fs0 +; RV32IDZFH-NEXT: neg s3, s2 ; RV32IDZFH-NEXT: fmv.s fa0, fs0 ; RV32IDZFH-NEXT: call __fixsfdi +; RV32IDZFH-NEXT: and a0, s3, a0 +; RV32IDZFH-NEXT: or a0, s1, a0 +; RV32IDZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFH-NEXT: neg a2, a2 ; RV32IDZFH-NEXT: lui a4, 524288 -; RV32IDZFH-NEXT: lui a2, 524288 -; RV32IDZFH-NEXT: beqz s0, .LBB10_2 +; RV32IDZFH-NEXT: li a5, 1 +; RV32IDZFH-NEXT: lui a3, 524288 +; RV32IDZFH-NEXT: bne s2, a5, .LBB10_2 ; RV32IDZFH-NEXT: # %bb.1: # %start -; RV32IDZFH-NEXT: mv a2, a1 +; RV32IDZFH-NEXT: mv a3, a1 ; RV32IDZFH-NEXT: .LBB10_2: # %start -; RV32IDZFH-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IDZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IDZFH-NEXT: beqz a3, .LBB10_4 +; RV32IDZFH-NEXT: and a0, a2, a0 +; RV32IDZFH-NEXT: beqz s0, .LBB10_4 ; RV32IDZFH-NEXT: # %bb.3: -; RV32IDZFH-NEXT: addi a2, a4, -1 +; RV32IDZFH-NEXT: addi a3, a4, -1 ; RV32IDZFH-NEXT: .LBB10_4: # %start -; RV32IDZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IDZFH-NEXT: neg a4, a1 -; RV32IDZFH-NEXT: and a1, a4, a2 -; RV32IDZFH-NEXT: neg a2, a3 -; RV32IDZFH-NEXT: neg a3, s0 -; RV32IDZFH-NEXT: and a0, a3, a0 -; RV32IDZFH-NEXT: or a0, a2, a0 -; RV32IDZFH-NEXT: and a0, a4, a0 -; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: and a1, a2, a3 +; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: addi sp, sp, 32 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_l_h_sat: @@ -2263,8 +2277,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: li a5, 1 ; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s3, .LBB10_2 +; RV32IZHINX-NEXT: bne s3, a5, .LBB10_2 ; RV32IZHINX-NEXT: # %bb.1: # %start ; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB10_2: # %start @@ -2316,8 +2331,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZDINXZHINX-NEXT: neg a2, a2 ; RV32IZDINXZHINX-NEXT: lui a4, 524288 +; RV32IZDINXZHINX-NEXT: li a5, 1 ; RV32IZDINXZHINX-NEXT: lui a3, 524288 -; RV32IZDINXZHINX-NEXT: beqz s3, .LBB10_2 +; RV32IZDINXZHINX-NEXT: bne s3, a5, .LBB10_2 ; RV32IZDINXZHINX-NEXT: # %bb.1: # %start ; RV32IZDINXZHINX-NEXT: mv a3, a1 ; RV32IZDINXZHINX-NEXT: .LBB10_2: # %start @@ -2448,42 +2464,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32ID-ILP32-LABEL: fcvt_l_h_sat: ; RV32ID-ILP32: # %bb.0: # %start -; RV32ID-ILP32-NEXT: addi sp, sp, -16 -; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: addi sp, sp, -32 +; RV32ID-ILP32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 +; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0) +; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1) ; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: fsw fa4, 8(sp) # 4-byte Folded Spill +; RV32ID-ILP32-NEXT: flt.s s0, fa5, fa4 +; RV32ID-ILP32-NEXT: neg s1, s0 ; RV32ID-ILP32-NEXT: lui a1, 913408 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32-NEXT: fsw fa4, 4(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: fle.s s0, fa5, fa4 +; RV32ID-ILP32-NEXT: fle.s s2, fa5, fa4 +; RV32ID-ILP32-NEXT: neg s3, s2 ; RV32ID-ILP32-NEXT: call __fixsfdi +; RV32ID-ILP32-NEXT: and a0, s3, a0 +; RV32ID-ILP32-NEXT: or a0, s1, a0 +; RV32ID-ILP32-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: feq.s a2, fa5, fa5 +; RV32ID-ILP32-NEXT: neg a2, a2 ; RV32ID-ILP32-NEXT: lui a4, 524288 -; RV32ID-ILP32-NEXT: lui a2, 524288 -; RV32ID-ILP32-NEXT: beqz s0, .LBB10_2 +; RV32ID-ILP32-NEXT: li a5, 1 +; RV32ID-ILP32-NEXT: lui a3, 524288 +; RV32ID-ILP32-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-ILP32-NEXT: # %bb.1: # %start -; RV32ID-ILP32-NEXT: mv a2, a1 +; RV32ID-ILP32-NEXT: mv a3, a1 ; RV32ID-ILP32-NEXT: .LBB10_2: # %start -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-ILP32-NEXT: flw fa4, 4(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: flt.s a3, fa5, fa4 -; RV32ID-ILP32-NEXT: fmv.s fa5, fa4 -; RV32ID-ILP32-NEXT: beqz a3, .LBB10_4 +; RV32ID-ILP32-NEXT: and a0, a2, a0 +; RV32ID-ILP32-NEXT: beqz s0, .LBB10_4 ; RV32ID-ILP32-NEXT: # %bb.3: -; RV32ID-ILP32-NEXT: addi a2, a4, -1 +; RV32ID-ILP32-NEXT: addi a3, a4, -1 ; RV32ID-ILP32-NEXT: .LBB10_4: # %start -; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5 -; RV32ID-ILP32-NEXT: neg a4, a1 -; RV32ID-ILP32-NEXT: and a1, a4, a2 -; RV32ID-ILP32-NEXT: neg a2, a3 -; RV32ID-ILP32-NEXT: neg a3, s0 -; RV32ID-ILP32-NEXT: and a0, a3, a0 -; RV32ID-ILP32-NEXT: or a0, a2, a0 -; RV32ID-ILP32-NEXT: and a0, a4, a0 -; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ID-ILP32-NEXT: addi sp, sp, 16 +; RV32ID-ILP32-NEXT: and a1, a2, a3 +; RV32ID-ILP32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ID-ILP32-NEXT: addi sp, sp, 32 ; RV32ID-ILP32-NEXT: ret ; ; RV64ID-LP64-LABEL: fcvt_l_h_sat: @@ -2503,41 +2525,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32ID-LABEL: fcvt_l_h_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: addi sp, sp, -16 -; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ID-NEXT: addi sp, sp, -32 +; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 +; RV32ID-NEXT: lui a0, %hi(.LCPI10_0) +; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32ID-NEXT: fmv.s fs0, fa0 +; RV32ID-NEXT: flt.s s0, fa5, fa0 +; RV32ID-NEXT: neg s1, s0 ; RV32ID-NEXT: lui a0, 913408 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fle.s s0, fa5, fa0 +; RV32ID-NEXT: fle.s s2, fa5, fa0 +; RV32ID-NEXT: neg s3, s2 ; RV32ID-NEXT: call __fixsfdi +; RV32ID-NEXT: and a0, s3, a0 +; RV32ID-NEXT: or a0, s1, a0 +; RV32ID-NEXT: feq.s a2, fs0, fs0 +; RV32ID-NEXT: neg a2, a2 ; RV32ID-NEXT: lui a4, 524288 -; RV32ID-NEXT: lui a2, 524288 -; RV32ID-NEXT: beqz s0, .LBB10_2 +; RV32ID-NEXT: li a5, 1 +; RV32ID-NEXT: lui a3, 524288 +; RV32ID-NEXT: bne s2, a5, .LBB10_2 ; RV32ID-NEXT: # %bb.1: # %start -; RV32ID-NEXT: mv a2, a1 +; RV32ID-NEXT: mv a3, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32ID-NEXT: flt.s a3, fa5, fs0 -; RV32ID-NEXT: beqz a3, .LBB10_4 +; RV32ID-NEXT: and a0, a2, a0 +; RV32ID-NEXT: beqz s0, .LBB10_4 ; RV32ID-NEXT: # %bb.3: -; RV32ID-NEXT: addi a2, a4, -1 +; RV32ID-NEXT: addi a3, a4, -1 ; RV32ID-NEXT: .LBB10_4: # %start -; RV32ID-NEXT: feq.s a1, fs0, fs0 -; RV32ID-NEXT: neg a4, a1 -; RV32ID-NEXT: and a1, a4, a2 -; RV32ID-NEXT: neg a2, a3 -; RV32ID-NEXT: neg a3, s0 -; RV32ID-NEXT: and a0, a3, a0 -; RV32ID-NEXT: or a0, a2, a0 -; RV32ID-NEXT: and a0, a4, a0 -; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ID-NEXT: and a1, a2, a3 +; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32ID-NEXT: addi sp, sp, 16 +; RV32ID-NEXT: addi sp, sp, 32 ; RV32ID-NEXT: ret ; ; RV64ID-LABEL: fcvt_l_h_sat: @@ -2556,41 +2585,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IFZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IFZFHMIN: # %bb.0: # %start -; RV32IFZFHMIN-NEXT: addi sp, sp, -16 -; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: addi sp, sp, -32 +; RV32IFZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IFZFHMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg s1, s0 ; RV32IFZFHMIN-NEXT: lui a0, 913408 ; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IFZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IFZFHMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IFZFHMIN-NEXT: neg s3, s2 ; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IFZFHMIN-NEXT: call __fixsfdi +; RV32IFZFHMIN-NEXT: and a0, s3, a0 +; RV32IFZFHMIN-NEXT: or a0, s1, a0 +; RV32IFZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IFZFHMIN-NEXT: neg a2, a2 ; RV32IFZFHMIN-NEXT: lui a4, 524288 -; RV32IFZFHMIN-NEXT: lui a2, 524288 -; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_2 +; RV32IFZFHMIN-NEXT: li a5, 1 +; RV32IFZFHMIN-NEXT: lui a3, 524288 +; RV32IFZFHMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IFZFHMIN-NEXT: # %bb.1: # %start -; RV32IFZFHMIN-NEXT: mv a2, a1 +; RV32IFZFHMIN-NEXT: mv a3, a1 ; RV32IFZFHMIN-NEXT: .LBB10_2: # %start -; RV32IFZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IFZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IFZFHMIN-NEXT: beqz a3, .LBB10_4 +; RV32IFZFHMIN-NEXT: and a0, a2, a0 +; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_4 ; RV32IFZFHMIN-NEXT: # %bb.3: -; RV32IFZFHMIN-NEXT: addi a2, a4, -1 +; RV32IFZFHMIN-NEXT: addi a3, a4, -1 ; RV32IFZFHMIN-NEXT: .LBB10_4: # %start -; RV32IFZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IFZFHMIN-NEXT: neg a4, a1 -; RV32IFZFHMIN-NEXT: and a1, a4, a2 -; RV32IFZFHMIN-NEXT: neg a2, a3 -; RV32IFZFHMIN-NEXT: neg a3, s0 -; RV32IFZFHMIN-NEXT: and a0, a3, a0 -; RV32IFZFHMIN-NEXT: or a0, a2, a0 -; RV32IFZFHMIN-NEXT: and a0, a4, a0 -; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: addi sp, sp, 16 +; RV32IFZFHMIN-NEXT: and a1, a2, a3 +; RV32IFZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 32 ; RV32IFZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_l_h_sat: @@ -2605,41 +2641,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IDZFHMIN: # %bb.0: # %start -; RV32IDZFHMIN-NEXT: addi sp, sp, -16 -; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: addi sp, sp, -32 +; RV32IDZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IDZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) +; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 +; RV32IDZFHMIN-NEXT: flt.s s0, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg s1, s0 ; RV32IDZFHMIN-NEXT: lui a0, 913408 ; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IDZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IDZFHMIN-NEXT: fle.s s2, fa5, fs0 +; RV32IDZFHMIN-NEXT: neg s3, s2 ; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IDZFHMIN-NEXT: call __fixsfdi +; RV32IDZFHMIN-NEXT: and a0, s3, a0 +; RV32IDZFHMIN-NEXT: or a0, s1, a0 +; RV32IDZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IDZFHMIN-NEXT: neg a2, a2 ; RV32IDZFHMIN-NEXT: lui a4, 524288 -; RV32IDZFHMIN-NEXT: lui a2, 524288 -; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_2 +; RV32IDZFHMIN-NEXT: li a5, 1 +; RV32IDZFHMIN-NEXT: lui a3, 524288 +; RV32IDZFHMIN-NEXT: bne s2, a5, .LBB10_2 ; RV32IDZFHMIN-NEXT: # %bb.1: # %start -; RV32IDZFHMIN-NEXT: mv a2, a1 +; RV32IDZFHMIN-NEXT: mv a3, a1 ; RV32IDZFHMIN-NEXT: .LBB10_2: # %start -; RV32IDZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) -; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) -; RV32IDZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IDZFHMIN-NEXT: beqz a3, .LBB10_4 +; RV32IDZFHMIN-NEXT: and a0, a2, a0 +; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_4 ; RV32IDZFHMIN-NEXT: # %bb.3: -; RV32IDZFHMIN-NEXT: addi a2, a4, -1 +; RV32IDZFHMIN-NEXT: addi a3, a4, -1 ; RV32IDZFHMIN-NEXT: .LBB10_4: # %start -; RV32IDZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IDZFHMIN-NEXT: neg a4, a1 -; RV32IDZFHMIN-NEXT: and a1, a4, a2 -; RV32IDZFHMIN-NEXT: neg a2, a3 -; RV32IDZFHMIN-NEXT: neg a3, s0 -; RV32IDZFHMIN-NEXT: and a0, a3, a0 -; RV32IDZFHMIN-NEXT: or a0, a2, a0 -; RV32IDZFHMIN-NEXT: and a0, a4, a0 -; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: and a1, a2, a3 +; RV32IDZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFHMIN-NEXT: addi sp, sp, 16 +; RV32IDZFHMIN-NEXT: addi sp, sp, 32 ; RV32IDZFHMIN-NEXT: ret ; ; CHECK32-IZHINXMIN-LABEL: fcvt_l_h_sat: @@ -2666,8 +2709,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: feq.s a2, s0, s0 ; CHECK32-IZHINXMIN-NEXT: neg a2, a2 ; CHECK32-IZHINXMIN-NEXT: lui a4, 524288 +; CHECK32-IZHINXMIN-NEXT: li a5, 1 ; CHECK32-IZHINXMIN-NEXT: lui a3, 524288 -; CHECK32-IZHINXMIN-NEXT: beqz s3, .LBB10_2 +; CHECK32-IZHINXMIN-NEXT: bne s3, a5, .LBB10_2 ; CHECK32-IZHINXMIN-NEXT: # %bb.1: # %start ; CHECK32-IZHINXMIN-NEXT: mv a3, a1 ; CHECK32-IZHINXMIN-NEXT: .LBB10_2: # %start @@ -2720,8 +2764,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: feq.s a2, s0, s0 ; CHECK32-IZDINXZHINXMIN-NEXT: neg a2, a2 ; CHECK32-IZDINXZHINXMIN-NEXT: lui a4, 524288 +; CHECK32-IZDINXZHINXMIN-NEXT: li a5, 1 ; CHECK32-IZDINXZHINXMIN-NEXT: lui a3, 524288 -; CHECK32-IZDINXZHINXMIN-NEXT: beqz s3, .LBB10_2 +; CHECK32-IZDINXZHINXMIN-NEXT: bne s3, a5, .LBB10_2 ; CHECK32-IZDINXZHINXMIN-NEXT: # %bb.1: # %start ; CHECK32-IZDINXZHINXMIN-NEXT: mv a3, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: .LBB10_2: # %start @@ -2939,7 +2984,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.w.x fa5, zero ; RV32IZFH-NEXT: fle.s a0, fa5, fa0 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: addi s1, a0, -1 ; RV32IZFH-NEXT: call __fixunssfdi ; RV32IZFH-NEXT: and a0, s1, a0 ; RV32IZFH-NEXT: or a0, s0, a0 @@ -2973,7 +3019,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: neg s0, a0 ; RV32IDZFH-NEXT: fmv.w.x fa5, zero ; RV32IDZFH-NEXT: fle.s a0, fa5, fa0 -; RV32IDZFH-NEXT: neg s1, a0 +; RV32IDZFH-NEXT: xori a0, a0, 1 +; RV32IDZFH-NEXT: addi s1, a0, -1 ; RV32IDZFH-NEXT: call __fixunssfdi ; RV32IDZFH-NEXT: and a0, s1, a0 ; RV32IDZFH-NEXT: or a0, s0, a0 @@ -3006,7 +3053,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: flt.s a1, a1, a0 ; RV32IZHINX-NEXT: neg s0, a1 ; RV32IZHINX-NEXT: fle.s a1, zero, a0 -; RV32IZHINX-NEXT: neg s1, a1 +; RV32IZHINX-NEXT: xori a1, a1, 1 +; RV32IZHINX-NEXT: addi s1, a1, -1 ; RV32IZHINX-NEXT: call __fixunssfdi ; RV32IZHINX-NEXT: and a0, s1, a0 ; RV32IZHINX-NEXT: or a0, s0, a0 @@ -3039,7 +3087,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: flt.s a1, a1, a0 ; RV32IZDINXZHINX-NEXT: neg s0, a1 ; RV32IZDINXZHINX-NEXT: fle.s a1, zero, a0 -; RV32IZDINXZHINX-NEXT: neg s1, a1 +; RV32IZDINXZHINX-NEXT: xori a1, a1, 1 +; RV32IZDINXZHINX-NEXT: addi s1, a1, -1 ; RV32IZDINXZHINX-NEXT: call __fixunssfdi ; RV32IZDINXZHINX-NEXT: and a0, s1, a0 ; RV32IZDINXZHINX-NEXT: or a0, s0, a0 @@ -3138,7 +3187,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: neg s0, a1 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, zero ; RV32ID-ILP32-NEXT: fle.s a1, fa5, fa4 -; RV32ID-ILP32-NEXT: neg s1, a1 +; RV32ID-ILP32-NEXT: xori a1, a1, 1 +; RV32ID-ILP32-NEXT: addi s1, a1, -1 ; RV32ID-ILP32-NEXT: call __fixunssfdi ; RV32ID-ILP32-NEXT: and a0, s1, a0 ; RV32ID-ILP32-NEXT: or a0, s0, a0 @@ -3178,7 +3228,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: xori a0, a0, 1 +; RV32ID-NEXT: addi s1, a0, -1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: or a0, s0, a0 @@ -3217,7 +3268,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZFHMIN-NEXT: neg s0, a0 ; CHECK32-IZFHMIN-NEXT: fmv.w.x fa5, zero ; CHECK32-IZFHMIN-NEXT: fle.s a0, fa5, fa0 -; CHECK32-IZFHMIN-NEXT: neg s1, a0 +; CHECK32-IZFHMIN-NEXT: xori a0, a0, 1 +; CHECK32-IZFHMIN-NEXT: addi s1, a0, -1 ; CHECK32-IZFHMIN-NEXT: call __fixunssfdi ; CHECK32-IZFHMIN-NEXT: and a0, s1, a0 ; CHECK32-IZFHMIN-NEXT: or a0, s0, a0 @@ -3251,7 +3303,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: flt.s a1, a1, a0 ; CHECK32-IZHINXMIN-NEXT: neg s0, a1 ; CHECK32-IZHINXMIN-NEXT: fle.s a1, zero, a0 -; CHECK32-IZHINXMIN-NEXT: neg s1, a1 +; CHECK32-IZHINXMIN-NEXT: xori a1, a1, 1 +; CHECK32-IZHINXMIN-NEXT: addi s1, a1, -1 ; CHECK32-IZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZHINXMIN-NEXT: and a0, s1, a0 ; CHECK32-IZHINXMIN-NEXT: or a0, s0, a0 @@ -3285,7 +3338,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a1, a1, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: neg s0, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a1, zero, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a1 +; CHECK32-IZDINXZHINXMIN-NEXT: xori a1, a1, 1 +; CHECK32-IZDINXZHINXMIN-NEXT: addi s1, a1, -1 ; CHECK32-IZDINXZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZDINXZHINXMIN-NEXT: and a0, s1, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: or a0, s0, a0 diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll index 3f385909b0b510..647af5f5b87438 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -108,38 +108,41 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB1_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB1_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB1_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB1_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB1_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB1_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB1_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_floor_si64: @@ -177,16 +180,17 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI1_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB1_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB1_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB1_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -194,11 +198,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB1_6 +; RV32IZHINX-NEXT: beqz a3, .LBB1_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB1_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_floor_si64: @@ -236,39 +240,42 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI1_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB1_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB1_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB1_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB1_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB1_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_floor_si64: @@ -320,16 +327,17 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB1_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB1_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB1_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -337,11 +345,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB1_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB1_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB1_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_floor_si64: @@ -413,7 +421,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -457,7 +465,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -499,7 +507,7 @@ define signext i32 @test_floor_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.floor.f16(half %x) @@ -522,25 +530,24 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB3_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI3_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -568,23 +575,22 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI3_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI3_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI3_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI3_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -622,26 +628,25 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB3_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -682,24 +687,23 @@ define i64 @test_floor_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI3_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI3_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI3_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -820,38 +824,41 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB5_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB5_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB5_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB5_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB5_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB5_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB5_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_ceil_si64: @@ -889,16 +896,17 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI5_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI5_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB5_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB5_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB5_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -906,11 +914,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB5_6 +; RV32IZHINX-NEXT: beqz a3, .LBB5_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB5_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_ceil_si64: @@ -948,39 +956,42 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI5_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB5_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB5_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB5_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI5_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB5_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB5_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_ceil_si64: @@ -1032,16 +1043,17 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB5_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB5_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB5_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1049,11 +1061,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB5_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB5_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB5_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_ceil_si64: @@ -1125,7 +1137,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -1169,7 +1181,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -1211,7 +1223,7 @@ define signext i32 @test_ceil_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.ceil.f16(half %x) @@ -1234,25 +1246,24 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB7_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI7_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -1280,23 +1291,22 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI7_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI7_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI7_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI7_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -1334,26 +1344,25 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB7_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -1394,24 +1403,23 @@ define i64 @test_ceil_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI7_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI7_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI7_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -1532,38 +1540,41 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB9_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB9_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB9_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB9_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB9_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB9_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB9_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_trunc_si64: @@ -1601,16 +1612,17 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI9_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI9_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB9_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB9_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB9_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1618,11 +1630,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB9_6 +; RV32IZHINX-NEXT: beqz a3, .LBB9_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB9_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_trunc_si64: @@ -1660,39 +1672,42 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB9_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB9_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB9_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB9_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB9_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_trunc_si64: @@ -1744,16 +1759,17 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB9_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB9_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB9_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1761,11 +1777,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB9_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB9_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB9_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_trunc_si64: @@ -1837,7 +1853,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -1881,7 +1897,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -1923,7 +1939,7 @@ define signext i32 @test_trunc_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.trunc.f16(half %x) @@ -1946,25 +1962,24 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB11_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI11_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -1992,23 +2007,22 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI11_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI11_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI11_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI11_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -2046,26 +2060,25 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB11_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -2106,24 +2119,23 @@ define i64 @test_trunc_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI11_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI11_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI11_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -2244,38 +2256,41 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI13_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB13_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB13_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB13_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB13_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB13_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB13_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB13_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_round_si64: @@ -2313,16 +2328,17 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI13_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI13_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB13_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB13_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB13_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2330,11 +2346,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB13_6 +; RV32IZHINX-NEXT: beqz a3, .LBB13_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB13_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_round_si64: @@ -2372,39 +2388,42 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI13_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB13_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB13_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB13_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI13_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB13_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB13_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_round_si64: @@ -2456,16 +2475,17 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB13_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB13_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB13_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2473,11 +2493,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB13_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB13_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB13_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_round_si64: @@ -2549,7 +2569,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -2593,7 +2613,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -2635,7 +2655,7 @@ define signext i32 @test_round_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.round.f16(half %x) @@ -2658,25 +2678,24 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB15_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI15_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -2704,23 +2723,22 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI15_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI15_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI15_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI15_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -2758,26 +2776,25 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB15_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -2818,24 +2835,23 @@ define i64 @test_round_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI15_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI15_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI15_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -2956,38 +2972,41 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI17_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB17_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB17_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB17_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB17_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB17_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB17_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB17_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_roundeven_si64: @@ -3025,16 +3044,17 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI17_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI17_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB17_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB17_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB17_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3042,11 +3062,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB17_6 +; RV32IZHINX-NEXT: beqz a3, .LBB17_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB17_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_roundeven_si64: @@ -3084,39 +3104,42 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI17_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB17_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB17_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB17_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI17_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB17_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB17_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_roundeven_si64: @@ -3168,16 +3191,17 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB17_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB17_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB17_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3185,11 +3209,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB17_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB17_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB17_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_roundeven_si64: @@ -3261,7 +3285,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -3305,7 +3329,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -3347,7 +3371,7 @@ define signext i32 @test_roundeven_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.roundeven.f16(half %x) @@ -3370,25 +3394,24 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB19_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI19_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -3416,23 +3439,22 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI19_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI19_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI19_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI19_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -3470,26 +3492,25 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB19_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -3530,24 +3551,23 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI19_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI19_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI19_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; @@ -3668,38 +3688,41 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi +; RV32IZFH-NEXT: lui a2, %hi(.LCPI21_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a2) +; RV32IZFH-NEXT: and a0, s1, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: lui a5, 524288 +; RV32IZFH-NEXT: li a6, 1 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a2, 524288 -; RV32IZFH-NEXT: beqz s0, .LBB21_4 +; RV32IZFH-NEXT: bne s0, a6, .LBB21_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: mv a4, a1 ; RV32IZFH-NEXT: .LBB21_4: -; RV32IZFH-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a1) -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: beqz a3, .LBB21_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a2, a4, -1 -; RV32IZFH-NEXT: .LBB21_6: -; RV32IZFH-NEXT: feq.s a1, fs0, fs0 -; RV32IZFH-NEXT: neg a4, a1 -; RV32IZFH-NEXT: and a1, a4, a2 -; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: beqz a3, .LBB21_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a4, a5, -1 +; RV32IZFH-NEXT: .LBB21_6: +; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_rint_si64: @@ -3737,16 +3760,17 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI21_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a4, a2, s0 -; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: flt.s a3, a2, s0 +; RV32IZHINX-NEXT: neg a2, a3 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a3, 524288 -; RV32IZHINX-NEXT: beqz s1, .LBB21_4 +; RV32IZHINX-NEXT: li a6, 1 +; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: bne s1, a6, .LBB21_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: mv a4, a1 ; RV32IZHINX-NEXT: .LBB21_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3754,11 +3778,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a4, .LBB21_6 +; RV32IZHINX-NEXT: beqz a3, .LBB21_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: addi a4, a5, -1 ; RV32IZHINX-NEXT: .LBB21_6: -; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: and a1, a2, a4 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_rint_si64: @@ -3796,39 +3820,42 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s1, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: lui a5, 524288 +; RV32IZFHMIN-NEXT: li a6, 1 ; RV32IZFHMIN-NEXT: lui a4, 524288 -; RV32IZFHMIN-NEXT: lui a2, 524288 -; RV32IZFHMIN-NEXT: beqz s0, .LBB21_4 +; RV32IZFHMIN-NEXT: bne s0, a6, .LBB21_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: mv a4, a1 ; RV32IZFHMIN-NEXT: .LBB21_4: -; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI21_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a1) -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a2, a4, -1 -; RV32IZFHMIN-NEXT: .LBB21_6: -; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a4, a1 -; RV32IZFHMIN-NEXT: and a1, a4, a2 -; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a4, a5, -1 +; RV32IZFHMIN-NEXT: .LBB21_6: +; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_rint_si64: @@ -3880,16 +3907,17 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a3 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a3, 524288 -; RV32IZHINXMIN-NEXT: beqz s1, .LBB21_4 +; RV32IZHINXMIN-NEXT: li a6, 1 +; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB21_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: mv a4, a1 ; RV32IZHINXMIN-NEXT: .LBB21_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3897,11 +3925,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a4, .LBB21_6 +; RV32IZHINXMIN-NEXT: beqz a3, .LBB21_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: addi a4, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB21_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: and a1, a2, a4 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_rint_si64: @@ -3973,7 +4001,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz ; RV64IZHINX-NEXT: feq.h a0, a0, a0 ; RV64IZHINX-NEXT: seqz a0, a0 -; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: addiw a0, a0, -1 ; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; @@ -4017,7 +4045,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 ; RV64IZFHMIN-NEXT: seqz a1, a1 -; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: addiw a1, a1, -1 ; RV64IZFHMIN-NEXT: and a0, a0, a1 ; RV64IZFHMIN-NEXT: ret ; @@ -4059,7 +4087,7 @@ define signext i32 @test_rint_ui32(half %x) { ; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz ; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 ; RV64IZHINXMIN-NEXT: seqz a0, a0 -; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: addiw a0, a0, -1 ; RV64IZHINXMIN-NEXT: and a0, a1, a0 ; RV64IZHINXMIN-NEXT: ret %a = call half @llvm.rint.f16(half %x) @@ -4082,25 +4110,24 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFH-NEXT: .LBB23_2: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa5, zero -; RV32IZFH-NEXT: fle.s a0, fa5, fs0 -; RV32IZFH-NEXT: neg s0, a0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixunssfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI23_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a2) -; RV32IZFH-NEXT: and a0, s0, a0 -; RV32IZFH-NEXT: flt.s a2, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: and a1, s0, a1 -; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a2, fa5, fs0 +; RV32IZFH-NEXT: lui a3, %hi(.LCPI23_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a3) +; RV32IZFH-NEXT: xori a2, a2, 1 +; RV32IZFH-NEXT: addi a2, a2, -1 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: neg a3, a3 +; RV32IZFH-NEXT: or a0, a3, a0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: or a1, a3, a1 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; @@ -4128,23 +4155,22 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -16 ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: fcvt.s.h s0, a0 -; RV32IZHINX-NEXT: fle.s a0, zero, s0 -; RV32IZHINX-NEXT: neg s1, a0 ; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi -; RV32IZHINX-NEXT: lui a2, %hi(.LCPI23_1) -; RV32IZHINX-NEXT: lw a2, %lo(.LCPI23_1)(a2) -; RV32IZHINX-NEXT: and a0, s1, a0 -; RV32IZHINX-NEXT: flt.s a2, a2, s0 -; RV32IZHINX-NEXT: neg a2, a2 -; RV32IZHINX-NEXT: or a0, a2, a0 -; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: fle.s a2, zero, s0 +; RV32IZHINX-NEXT: lui a3, %hi(.LCPI23_1) +; RV32IZHINX-NEXT: lw a3, %lo(.LCPI23_1)(a3) +; RV32IZHINX-NEXT: xori a2, a2, 1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: flt.s a3, a3, s0 +; RV32IZHINX-NEXT: neg a3, a3 +; RV32IZHINX-NEXT: or a0, a3, a0 +; RV32IZHINX-NEXT: and a1, a2, a1 +; RV32IZHINX-NEXT: or a1, a3, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret ; @@ -4182,26 +4208,25 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZFHMIN-NEXT: .LBB23_2: ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 -; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero -; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s0, a0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixunssfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s0, a0 -; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: and a1, s0, a1 -; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a3) +; RV32IZFHMIN-NEXT: xori a2, a2, 1 +; RV32IZFHMIN-NEXT: addi a2, a2, -1 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a3, a3 +; RV32IZFHMIN-NEXT: or a0, a3, a0 +; RV32IZFHMIN-NEXT: and a1, a2, a1 +; RV32IZFHMIN-NEXT: or a1, a3, a1 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret ; @@ -4242,24 +4267,23 @@ define i64 @test_rint_ui64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: addi sp, sp, -16 ; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 -; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 -; RV32IZHINXMIN-NEXT: neg s1, a0 ; RV32IZHINXMIN-NEXT: mv a0, s0 ; RV32IZHINXMIN-NEXT: call __fixunssfdi -; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32IZHINXMIN-NEXT: and a0, s1, a0 -; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a2 -; RV32IZHINXMIN-NEXT: or a0, a2, a0 -; RV32IZHINXMIN-NEXT: and a1, s1, a1 -; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0 +; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI23_0) +; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI23_0)(a3) +; RV32IZHINXMIN-NEXT: xori a2, a2, 1 +; RV32IZHINXMIN-NEXT: addi a2, a2, -1 +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0 +; RV32IZHINXMIN-NEXT: neg a3, a3 +; RV32IZHINXMIN-NEXT: or a0, a3, a0 +; RV32IZHINXMIN-NEXT: and a1, a2, a1 +; RV32IZHINXMIN-NEXT: or a1, a3, a1 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index cb64e24128b5e3..98c886333d69a0 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -302,56 +302,56 @@ define i128 @abs128(i128 %x) { ; RV32I-LABEL: abs128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: bgez a2, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: or a6, a4, a3 -; RV32I-NEXT: snez a6, a6 -; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, a7 +; RV32I-NEXT: sub a2, a1, t0 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: abs128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a2, 12(a1) -; RV32ZBB-NEXT: lw a3, 4(a1) -; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) ; RV32ZBB-NEXT: bgez a2, .LBB8_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: or a6, a4, a3 -; RV32ZBB-NEXT: snez a6, a6 -; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a6, a7, a6 +; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, a7 +; RV32ZBB-NEXT: sub a2, a1, t0 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB8_2: -; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -384,56 +384,56 @@ define i128 @select_abs128(i128 %x) { ; RV32I-LABEL: select_abs128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: bgez a2, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: or a6, a4, a3 -; RV32I-NEXT: snez a6, a6 -; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, a7 +; RV32I-NEXT: sub a2, a1, t0 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: snez a5, a4 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: select_abs128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a2, 12(a1) -; RV32ZBB-NEXT: lw a3, 4(a1) -; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) ; RV32ZBB-NEXT: bgez a2, .LBB9_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: or a6, a4, a3 -; RV32ZBB-NEXT: snez a6, a6 -; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a6, a7, a6 +; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, a7 +; RV32ZBB-NEXT: sub a2, a1, t0 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: snez a5, a4 -; RV32ZBB-NEXT: neg a3, a3 -; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB9_2: -; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/pr84200.ll b/llvm/test/CodeGen/RISCV/pr84200.ll new file mode 100644 index 00000000000000..19a102b84ed062 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr84200.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +; The sub nuw produces poison if the input is not 0 or 1. We must insert a +; freeze before converting the sub to AND so that we don't propagate poison. +define i64 @foo(i64 %1) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: sltiu a0, a0, 2 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret +entry: + %.urem.i = sub nuw i64 1, %1 + %.cmp.i = icmp ugt i64 %1, 1 + %2 = xor i64 %.urem.i, 1 + %3 = select i1 %.cmp.i, i64 0, i64 %2 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll index 71040bf2646d2c..4e958f5699adbf 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll @@ -298,14 +298,14 @@ define i32 @not_shl_one_i32(i32 %x) { define i64 @not_shl_one_i64(i64 %x) { ; CHECK-LABEL: not_shl_one_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: sll a1, a1, a0 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: slti a0, a0, 0 -; CHECK-NEXT: neg a2, a0 -; CHECK-NEXT: and a2, a2, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a1, a0, a1 +; CHECK-NEXT: addi a1, a0, -32 +; CHECK-NEXT: slti a1, a1, 0 +; CHECK-NEXT: neg a2, a1 +; CHECK-NEXT: li a3, 1 +; CHECK-NEXT: sll a0, a3, a0 +; CHECK-NEXT: and a2, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a0 ; CHECK-NEXT: not a0, a2 ; CHECK-NEXT: not a1, a1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll index ccda8f4e5dd059..30aba61ba47469 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll @@ -48,20 +48,20 @@ define i32 @bclr_i32_no_mask(i32 %a, i32 %b) nounwind { define i64 @bclr_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: bclr_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a3, 1 -; RV32I-NEXT: sll a4, a3, a2 -; RV32I-NEXT: andi a2, a2, 63 -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: slti a5, a5, 0 -; RV32I-NEXT: neg a6, a5 -; RV32I-NEXT: and a4, a6, a4 -; RV32I-NEXT: sll a2, a3, a2 -; RV32I-NEXT: addi a5, a5, -1 +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: slti a4, a4, 0 +; RV32I-NEXT: neg a5, a4 +; RV32I-NEXT: li a6, 1 +; RV32I-NEXT: sll a2, a6, a2 ; RV32I-NEXT: and a2, a5, a2 -; RV32I-NEXT: not a3, a4 +; RV32I-NEXT: sll a3, a6, a3 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: and a3, a4, a3 ; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a0, a3, a0 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBSNOZBB-LABEL: bclr_i64: @@ -186,14 +186,14 @@ define i64 @bset_i64(i64 %a, i64 %b) nounwind { define signext i64 @bset_i64_zero(i64 signext %a) nounwind { ; RV32I-LABEL: bset_i64_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: sll a1, a1, a0 -; RV32I-NEXT: addi a0, a0, -32 -; RV32I-NEXT: slti a2, a0, 0 -; RV32I-NEXT: neg a0, a2 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: addi a1, a0, -32 +; RV32I-NEXT: slti a1, a1, 0 +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: li a3, 1 +; RV32I-NEXT: sll a3, a3, a0 +; RV32I-NEXT: and a0, a2, a3 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBS-LABEL: bset_i64_zero: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index 4ec7f2660b2a35..73bfc6480b4d75 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -489,7 +489,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -513,7 +513,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64XTHEADBB-NEXT: add a0, a1, a0 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0) ; RV64XTHEADBB-NEXT: snez a1, s0 -; RV64XTHEADBB-NEXT: addi a1, a1, -1 +; RV64XTHEADBB-NEXT: addiw a1, a1, -1 ; RV64XTHEADBB-NEXT: or a0, a1, a0 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -542,12 +542,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: addiw a0, a0, 1 ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -569,12 +567,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64XTHEADBB-NEXT: add a0, a1, a0 ; RV64XTHEADBB-NEXT: lbu a0, 0(a0) -; RV64XTHEADBB-NEXT: addi a0, a0, 1 +; RV64XTHEADBB-NEXT: addiw a0, a0, 1 ; RV64XTHEADBB-NEXT: seqz a1, s0 -; RV64XTHEADBB-NEXT: addi a1, a1, -1 +; RV64XTHEADBB-NEXT: addiw a1, a1, -1 ; RV64XTHEADBB-NEXT: and a0, a1, a0 -; RV64XTHEADBB-NEXT: slli a0, a0, 32 -; RV64XTHEADBB-NEXT: srli a0, a0, 32 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64XTHEADBB-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll index 68ce66cbe8537d..7feef4dad4116a 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll @@ -444,7 +444,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -481,12 +481,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: addiw a0, a0, 1 ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -495,11 +493,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64ZBB-LABEL: ffs_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctzw a1, a0 -; RV64ZBB-NEXT: addi a1, a1, 1 +; RV64ZBB-NEXT: addiw a1, a1, 1 ; RV64ZBB-NEXT: seqz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: addiw a0, a0, -1 ; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) %2 = add i32 %1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 48ce7d623475cb..652a1799ae55c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -89,17 +89,17 @@ entry: define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz -; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2 +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB2_2: # %entry -; CHECK-NOV-NEXT: blt a1, a2, .LBB2_4 +; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB2_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -257,46 +257,46 @@ entry: define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz ; CHECK-NOV-NEXT: li a4, -1 ; CHECK-NOV-NEXT: srli a4, a4, 32 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa1, rtz +; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7 ; CHECK-NOV-NEXT: .LBB5_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8 ; CHECK-NOV-NEXT: .LBB5_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5 ; CHECK-NOV-NEXT: .LBB5_4: # %entry ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB5_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a5 -; CHECK-NOV-NEXT: sgtz a6, a3 -; CHECK-NOV-NEXT: sgtz a7, a2 -; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a6, a2 +; CHECK-NOV-NEXT: sgtz a7, a3 +; CHECK-NOV-NEXT: sgtz t0, a5 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a5, t0, a5 ; CHECK-NOV-NEXT: negw a7, a7 -; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a7, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sw a4, 12(a0) -; CHECK-NOV-NEXT: sw a3, 8(a0) -; CHECK-NOV-NEXT: sw a2, 4(a0) -; CHECK-NOV-NEXT: sw a1, 0(a0) +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sw a1, 12(a0) +; CHECK-NOV-NEXT: sw a2, 8(a0) +; CHECK-NOV-NEXT: sw a3, 4(a0) +; CHECK-NOV-NEXT: sw a5, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB5_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2 ; CHECK-NOV-NEXT: .LBB5_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3 ; CHECK-NOV-NEXT: .LBB5_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -700,10 +700,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs0, -48 ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 24(a1) -; CHECK-NOV-NEXT: lhu s3, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 24(a1) +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu s3, 8(a1) +; CHECK-NOV-NEXT: lhu a1, 16(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 @@ -732,22 +732,22 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .LBB8_4: # %entry ; CHECK-NOV-NEXT: mv a3, a2 ; CHECK-NOV-NEXT: .LBB8_5: # %entry -; CHECK-NOV-NEXT: sgtz a2, a3 -; CHECK-NOV-NEXT: sgtz a4, a1 -; CHECK-NOV-NEXT: sgtz a5, s1 -; CHECK-NOV-NEXT: sgtz a6, a0 +; CHECK-NOV-NEXT: sgtz a2, a0 +; CHECK-NOV-NEXT: sgtz a4, s1 +; CHECK-NOV-NEXT: sgtz a5, a1 +; CHECK-NOV-NEXT: sgtz a6, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a0, a6, a0 +; CHECK-NOV-NEXT: and a3, a6, a3 ; CHECK-NOV-NEXT: negw a5, a5 -; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: and a1, a5, a1 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: negw a2, a2 -; CHECK-NOV-NEXT: and a2, a2, a3 -; CHECK-NOV-NEXT: sw a2, 12(s0) -; CHECK-NOV-NEXT: sw a1, 8(s0) -; CHECK-NOV-NEXT: sw a5, 4(s0) -; CHECK-NOV-NEXT: sw a0, 0(s0) +; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: sw a0, 12(s0) +; CHECK-NOV-NEXT: sw a4, 8(s0) +; CHECK-NOV-NEXT: sw a1, 4(s0) +; CHECK-NOV-NEXT: sw a3, 0(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -950,17 +950,17 @@ entry: define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz -; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2 +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz +; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB11_2: # %entry -; CHECK-NOV-NEXT: blt a1, a2, .LBB11_4 +; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB11_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -1122,46 +1122,46 @@ entry: define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz ; CHECK-NOV-NEXT: lui a4, 16 ; CHECK-NOV-NEXT: addiw a4, a4, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa1, rtz +; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz ; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7 ; CHECK-NOV-NEXT: .LBB14_2: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8 ; CHECK-NOV-NEXT: .LBB14_3: # %entry ; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5 ; CHECK-NOV-NEXT: .LBB14_4: # %entry ; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB14_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a5 -; CHECK-NOV-NEXT: sgtz a6, a3 -; CHECK-NOV-NEXT: sgtz a7, a2 -; CHECK-NOV-NEXT: sgtz t0, a1 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a6, a2 +; CHECK-NOV-NEXT: sgtz a7, a3 +; CHECK-NOV-NEXT: sgtz t0, a5 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a5, t0, a5 ; CHECK-NOV-NEXT: negw a7, a7 -; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a7, a3 ; CHECK-NOV-NEXT: negw a6, a6 -; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a2, a6, a2 ; CHECK-NOV-NEXT: negw a4, a4 -; CHECK-NOV-NEXT: and a4, a4, a5 -; CHECK-NOV-NEXT: sh a4, 6(a0) -; CHECK-NOV-NEXT: sh a3, 4(a0) -; CHECK-NOV-NEXT: sh a2, 2(a0) -; CHECK-NOV-NEXT: sh a1, 0(a0) +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: sh a1, 6(a0) +; CHECK-NOV-NEXT: sh a2, 4(a0) +; CHECK-NOV-NEXT: sh a3, 2(a0) +; CHECK-NOV-NEXT: sh a5, 0(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB14_6: # %entry ; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz ; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2 ; CHECK-NOV-NEXT: .LBB14_7: # %entry ; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3 ; CHECK-NOV-NEXT: .LBB14_8: # %entry ; CHECK-NOV-NEXT: mv a3, a4 @@ -1822,14 +1822,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs4, -112 ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 56(a1) -; CHECK-NOV-NEXT: lhu s3, 48(a1) -; CHECK-NOV-NEXT: lhu s4, 40(a1) -; CHECK-NOV-NEXT: lhu s5, 32(a1) -; CHECK-NOV-NEXT: lhu s6, 24(a1) -; CHECK-NOV-NEXT: lhu s7, 16(a1) -; CHECK-NOV-NEXT: lhu a1, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 56(a1) +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu s3, 8(a1) +; CHECK-NOV-NEXT: lhu s4, 16(a1) +; CHECK-NOV-NEXT: lhu s5, 24(a1) +; CHECK-NOV-NEXT: lhu s6, 32(a1) +; CHECK-NOV-NEXT: lhu s7, 40(a1) +; CHECK-NOV-NEXT: lhu a1, 48(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 @@ -1882,38 +1882,38 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .LBB17_8: # %entry ; CHECK-NOV-NEXT: mv a7, a3 ; CHECK-NOV-NEXT: .LBB17_9: # %entry -; CHECK-NOV-NEXT: sgtz a3, a7 -; CHECK-NOV-NEXT: sgtz t0, a6 -; CHECK-NOV-NEXT: sgtz t1, a5 -; CHECK-NOV-NEXT: sgtz t2, a4 -; CHECK-NOV-NEXT: sgtz t3, a2 -; CHECK-NOV-NEXT: sgtz t4, a1 -; CHECK-NOV-NEXT: sgtz t5, s1 -; CHECK-NOV-NEXT: sgtz t6, a0 +; CHECK-NOV-NEXT: sgtz a3, a0 +; CHECK-NOV-NEXT: sgtz t0, s1 +; CHECK-NOV-NEXT: sgtz t1, a1 +; CHECK-NOV-NEXT: sgtz t2, a2 +; CHECK-NOV-NEXT: sgtz t3, a4 +; CHECK-NOV-NEXT: sgtz t4, a5 +; CHECK-NOV-NEXT: sgtz t5, a6 +; CHECK-NOV-NEXT: sgtz t6, a7 ; CHECK-NOV-NEXT: negw t6, t6 -; CHECK-NOV-NEXT: and a0, t6, a0 +; CHECK-NOV-NEXT: and a7, t6, a7 ; CHECK-NOV-NEXT: negw t5, t5 -; CHECK-NOV-NEXT: and t5, t5, s1 +; CHECK-NOV-NEXT: and a6, t5, a6 ; CHECK-NOV-NEXT: negw t4, t4 -; CHECK-NOV-NEXT: and a1, t4, a1 +; CHECK-NOV-NEXT: and a5, t4, a5 ; CHECK-NOV-NEXT: negw t3, t3 -; CHECK-NOV-NEXT: and a2, t3, a2 +; CHECK-NOV-NEXT: and a4, t3, a4 ; CHECK-NOV-NEXT: negw t2, t2 -; CHECK-NOV-NEXT: and a4, t2, a4 +; CHECK-NOV-NEXT: and a2, t2, a2 ; CHECK-NOV-NEXT: negw t1, t1 -; CHECK-NOV-NEXT: and a5, t1, a5 +; CHECK-NOV-NEXT: and a1, t1, a1 ; CHECK-NOV-NEXT: negw t0, t0 -; CHECK-NOV-NEXT: and a6, t0, a6 +; CHECK-NOV-NEXT: and t0, t0, s1 ; CHECK-NOV-NEXT: negw a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a7 -; CHECK-NOV-NEXT: sh a3, 14(s0) -; CHECK-NOV-NEXT: sh a6, 12(s0) -; CHECK-NOV-NEXT: sh a5, 10(s0) -; CHECK-NOV-NEXT: sh a4, 8(s0) -; CHECK-NOV-NEXT: sh a2, 6(s0) -; CHECK-NOV-NEXT: sh a1, 4(s0) -; CHECK-NOV-NEXT: sh t5, 2(s0) -; CHECK-NOV-NEXT: sh a0, 0(s0) +; CHECK-NOV-NEXT: and a0, a3, a0 +; CHECK-NOV-NEXT: sh a0, 14(s0) +; CHECK-NOV-NEXT: sh t0, 12(s0) +; CHECK-NOV-NEXT: sh a1, 10(s0) +; CHECK-NOV-NEXT: sh a2, 8(s0) +; CHECK-NOV-NEXT: sh a4, 6(s0) +; CHECK-NOV-NEXT: sh a5, 4(s0) +; CHECK-NOV-NEXT: sh a6, 2(s0) +; CHECK-NOV-NEXT: sh a7, 0(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -2106,66 +2106,65 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa0 +; CHECK-NOV-NEXT: fmv.d fa0, fa1 ; CHECK-NOV-NEXT: call __fixdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB18_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB18_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB18_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB18_4 ; CHECK-NOV-NEXT: .LBB18_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB18_5 ; CHECK-NOV-NEXT: j .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB18_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB18_2 ; CHECK-NOV-NEXT: .LBB18_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB18_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB18_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB18_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB18_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB18_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB18_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB18_12 ; CHECK-NOV-NEXT: .LBB18_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB18_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB18_13 ; CHECK-NOV-NEXT: j .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB18_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB18_10 ; CHECK-NOV-NEXT: .LBB18_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB18_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB18_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB18_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB18_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2190,43 +2189,43 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB18_3 +; CHECK-V-NEXT: beqz s1, .LBB18_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB18_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB18_5 ; CHECK-V-NEXT: j .LBB18_6 ; CHECK-V-NEXT: .LBB18_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB18_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB18_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB18_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB18_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB18_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2234,26 +2233,26 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB18_12 ; CHECK-V-NEXT: .LBB18_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB18_13 ; CHECK-V-NEXT: j .LBB18_14 ; CHECK-V-NEXT: .LBB18_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB18_10 ; CHECK-V-NEXT: .LBB18_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB18_14 ; CHECK-V-NEXT: .LBB18_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB18_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB18_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB18_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2286,19 +2285,19 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa0 +; CHECK-NOV-NEXT: fmv.d fa0, fa1 ; CHECK-NOV-NEXT: call __fixunsdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2323,25 +2322,25 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2383,32 +2382,32 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB20_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB20_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB20_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB20_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB20_8 ; CHECK-NOV-NEXT: .LBB20_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB20_9 ; CHECK-NOV-NEXT: .LBB20_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB20_6 ; CHECK-NOV-NEXT: .LBB20_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB20_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2450,15 +2449,15 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB20_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB20_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -2513,66 +2512,65 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.s fa0, fa1 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB21_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB21_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB21_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB21_4 ; CHECK-NOV-NEXT: .LBB21_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB21_5 ; CHECK-NOV-NEXT: j .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB21_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB21_2 ; CHECK-NOV-NEXT: .LBB21_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB21_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB21_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB21_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB21_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB21_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB21_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB21_12 ; CHECK-NOV-NEXT: .LBB21_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB21_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB21_13 ; CHECK-NOV-NEXT: j .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB21_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB21_10 ; CHECK-NOV-NEXT: .LBB21_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB21_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB21_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB21_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB21_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2597,43 +2595,43 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB21_3 +; CHECK-V-NEXT: beqz s1, .LBB21_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB21_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB21_5 ; CHECK-V-NEXT: j .LBB21_6 ; CHECK-V-NEXT: .LBB21_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB21_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB21_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB21_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB21_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB21_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2641,26 +2639,26 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB21_12 ; CHECK-V-NEXT: .LBB21_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB21_13 ; CHECK-V-NEXT: j .LBB21_14 ; CHECK-V-NEXT: .LBB21_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB21_10 ; CHECK-V-NEXT: .LBB21_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB21_14 ; CHECK-V-NEXT: .LBB21_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB21_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB21_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB21_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2693,19 +2691,19 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa0 +; CHECK-NOV-NEXT: fmv.s fa0, fa1 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s0 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2730,25 +2728,25 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2790,32 +2788,32 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB23_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB23_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB23_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB23_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB23_8 ; CHECK-NOV-NEXT: .LBB23_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB23_9 ; CHECK-NOV-NEXT: .LBB23_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB23_6 ; CHECK-NOV-NEXT: .LBB23_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB23_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2857,15 +2855,15 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB23_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB23_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -2920,8 +2918,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s2, a1 -; CHECK-NOV-NEXT: fmv.w.x fa0, a0 +; CHECK-NOV-NEXT: mv s2, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 @@ -2929,60 +2927,58 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, a0 -; CHECK-NOV-NEXT: li a0, -1 -; CHECK-NOV-NEXT: srli a3, a0, 1 -; CHECK-NOV-NEXT: beqz a1, .LBB24_3 +; CHECK-NOV-NEXT: li a2, -1 +; CHECK-NOV-NEXT: srli a3, a2, 1 +; CHECK-NOV-NEXT: beqz s1, .LBB24_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, a1, 0 -; CHECK-NOV-NEXT: bnez s1, .LBB24_4 +; CHECK-NOV-NEXT: slti a4, s1, 0 +; CHECK-NOV-NEXT: bnez a1, .LBB24_4 ; CHECK-NOV-NEXT: .LBB24_2: -; CHECK-NOV-NEXT: sltu a5, s0, a3 +; CHECK-NOV-NEXT: sltu a5, a0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB24_5 ; CHECK-NOV-NEXT: j .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_3: -; CHECK-NOV-NEXT: sltu a4, a2, a3 -; CHECK-NOV-NEXT: beqz s1, .LBB24_2 +; CHECK-NOV-NEXT: sltu a4, s0, a3 +; CHECK-NOV-NEXT: beqz a1, .LBB24_2 ; CHECK-NOV-NEXT: .LBB24_4: # %entry -; CHECK-NOV-NEXT: slti a5, s1, 0 +; CHECK-NOV-NEXT: slti a5, a1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_5: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: .LBB24_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, a1 +; CHECK-NOV-NEXT: and a5, a5, s1 ; CHECK-NOV-NEXT: bnez a4, .LBB24_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB24_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, s1 -; CHECK-NOV-NEXT: slli a1, a0, 63 -; CHECK-NOV-NEXT: beq a5, a0, .LBB24_11 +; CHECK-NOV-NEXT: and a4, a6, a1 +; CHECK-NOV-NEXT: slli a1, a2, 63 +; CHECK-NOV-NEXT: beq a5, a2, .LBB24_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a0, .LBB24_12 +; CHECK-NOV-NEXT: bne a4, a2, .LBB24_12 ; CHECK-NOV-NEXT: .LBB24_10: -; CHECK-NOV-NEXT: sltu a0, a1, s0 -; CHECK-NOV-NEXT: beqz a0, .LBB24_13 +; CHECK-NOV-NEXT: sltu a2, a1, a0 +; CHECK-NOV-NEXT: beqz a2, .LBB24_13 ; CHECK-NOV-NEXT: j .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_11: -; CHECK-NOV-NEXT: sltu a3, a1, a2 -; CHECK-NOV-NEXT: beq a4, a0, .LBB24_10 +; CHECK-NOV-NEXT: sltu a3, a1, s0 +; CHECK-NOV-NEXT: beq a4, a2, .LBB24_10 ; CHECK-NOV-NEXT: .LBB24_12: # %entry -; CHECK-NOV-NEXT: slti a0, a4, 0 -; CHECK-NOV-NEXT: xori a0, a0, 1 -; CHECK-NOV-NEXT: bnez a0, .LBB24_14 +; CHECK-NOV-NEXT: slti a2, a4, 0 +; CHECK-NOV-NEXT: xori a2, a2, 1 +; CHECK-NOV-NEXT: bnez a2, .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_13: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a0, a1 ; CHECK-NOV-NEXT: .LBB24_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB24_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv a2, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB24_16: # %entry -; CHECK-NOV-NEXT: mv a0, s0 -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a1, s0 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3002,8 +2998,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s2, a1 -; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: mv s2, a0 +; CHECK-V-NEXT: fmv.w.x fa0, a1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 @@ -3013,31 +3009,31 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz a1, .LBB24_3 +; CHECK-V-NEXT: beqz s1, .LBB24_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: bnez s1, .LBB24_4 +; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: bnez a1, .LBB24_4 ; CHECK-V-NEXT: .LBB24_2: -; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: sltu a5, a0, a3 ; CHECK-V-NEXT: beqz a5, .LBB24_5 ; CHECK-V-NEXT: j .LBB24_6 ; CHECK-V-NEXT: .LBB24_3: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: beqz s1, .LBB24_2 +; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: beqz a1, .LBB24_2 ; CHECK-V-NEXT: .LBB24_4: # %entry -; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: slti a5, a1, 0 ; CHECK-V-NEXT: bnez a5, .LBB24_6 ; CHECK-V-NEXT: .LBB24_5: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB24_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: and a5, a5, s1 ; CHECK-V-NEXT: bnez a4, .LBB24_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB24_8: # %entry -; CHECK-V-NEXT: and a4, a6, s1 +; CHECK-V-NEXT: and a4, a6, a1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB24_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -3045,26 +3041,26 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB24_12 ; CHECK-V-NEXT: .LBB24_10: -; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: sltu a2, a1, a0 ; CHECK-V-NEXT: beqz a2, .LBB24_13 ; CHECK-V-NEXT: j .LBB24_14 ; CHECK-V-NEXT: .LBB24_11: -; CHECK-V-NEXT: sltu a3, a1, a0 +; CHECK-V-NEXT: sltu a3, a1, s0 ; CHECK-V-NEXT: beq a4, a2, .LBB24_10 ; CHECK-V-NEXT: .LBB24_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB24_14 ; CHECK-V-NEXT: .LBB24_13: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB24_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB24_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB24_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a0 -; CHECK-V-NEXT: vmv.s.x v8, s0 +; CHECK-V-NEXT: vmv.s.x v9, s0 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3095,8 +3091,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s0, a1 -; CHECK-NOV-NEXT: fmv.w.x fa0, a0 +; CHECK-NOV-NEXT: mv s0, a0 +; CHECK-NOV-NEXT: fmv.w.x fa0, a1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s1, a0 @@ -3104,13 +3100,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: snez a2, s2 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a2, a2, s1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, a0 -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: addi a1, a2, -1 +; CHECK-NOV-NEXT: and a1, a1, s1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3130,8 +3125,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s0, a1 -; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: fmv.w.x fa0, a1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s1, a0 @@ -3139,15 +3134,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s2 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s1 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a0 -; CHECK-V-NEXT: vmv.s.x v8, a2 +; CHECK-V-NEXT: vmv.s.x v9, a2 +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3190,32 +3185,32 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB26_2: # %entry -; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: slti a4, s1, 1 +; CHECK-NOV-NEXT: slti a3, a1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB26_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB26_4: # %entry -; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 +; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB26_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB26_8 ; CHECK-NOV-NEXT: .LBB26_6: -; CHECK-NOV-NEXT: snez a0, a4 +; CHECK-NOV-NEXT: snez a2, a0 ; CHECK-NOV-NEXT: j .LBB26_9 ; CHECK-NOV-NEXT: .LBB26_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a4, a4, s0 +; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB26_6 ; CHECK-NOV-NEXT: .LBB26_8: # %entry -; CHECK-NOV-NEXT: sgtz a0, a2 +; CHECK-NOV-NEXT: sgtz a2, a2 ; CHECK-NOV-NEXT: .LBB26_9: # %entry -; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, a4 +; CHECK-NOV-NEXT: neg a2, a2 +; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -3251,15 +3246,15 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB26_2: # %entry -; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: slti a4, a1, 1 ; CHECK-V-NEXT: blez a1, .LBB26_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB26_4: # %entry -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB26_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll index 81076e41a7cb76..122ac13cb25731 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll @@ -13,18 +13,18 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: srli a2, a0, 16 -; RV32-NEXT: srli a3, a0, 8 -; RV32-NEXT: slli a4, a0, 16 -; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: slli a3, a0, 16 +; RV32-NEXT: srli a4, a3, 24 +; RV32-NEXT: srai a3, a3, 24 ; RV32-NEXT: slli a5, a0, 24 ; RV32-NEXT: srai a5, a5, 24 ; RV32-NEXT: slli a6, a0, 8 ; RV32-NEXT: srai a6, a6, 24 ; RV32-NEXT: sgtz a6, a6 ; RV32-NEXT: sgtz a5, a5 -; RV32-NEXT: sgtz a4, a4 -; RV32-NEXT: neg a4, a4 -; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: sgtz a3, a3 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a0, a4, a0 @@ -39,19 +39,19 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV64-LABEL: vec3_setcc_crash: ; RV64: # %bb.0: ; RV64-NEXT: lw a0, 0(a0) -; RV64-NEXT: srli a2, a0, 16 -; RV64-NEXT: srli a3, a0, 8 -; RV64-NEXT: slli a4, a0, 48 -; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: srliw a2, a0, 16 +; RV64-NEXT: slli a3, a0, 48 +; RV64-NEXT: srli a4, a3, 56 +; RV64-NEXT: srai a3, a3, 56 ; RV64-NEXT: slli a5, a0, 56 ; RV64-NEXT: srai a5, a5, 56 ; RV64-NEXT: slli a6, a0, 40 ; RV64-NEXT: srai a6, a6, 56 ; RV64-NEXT: sgtz a6, a6 ; RV64-NEXT: sgtz a5, a5 -; RV64-NEXT: sgtz a4, a4 -; RV64-NEXT: negw a4, a4 -; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: sgtz a3, a3 +; RV64-NEXT: negw a3, a3 +; RV64-NEXT: and a3, a3, a4 ; RV64-NEXT: slli a3, a3, 8 ; RV64-NEXT: negw a4, a5 ; RV64-NEXT: and a0, a4, a0 diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll index de36bcdb910609..069b2febc334d2 100644 --- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll @@ -422,7 +422,8 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { ; RV32I-NEXT: lui a1, 1048560 ; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: sltu a1, a1, a2 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; @@ -462,7 +463,8 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; RV32I-NEXT: addi a2, a0, -128 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: sltiu a1, a2, -256 ; RV32I-NEXT: xori a1, a1, 1 ; RV32I-NEXT: and a0, a0, a1 @@ -691,7 +693,8 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; RV32I-NEXT: addi a2, a0, 128 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: sltiu a1, a2, 256 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret