Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Insert a freeze before converting select to AND/OR. #84232

Merged
merged 5 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, -1) -> (c-1) | y
if (isAllOnesConstant(FalseV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
}

// (select c, 0, y) -> (c-1) & y
if (isNullConstant(TrueV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, 0) -> -c & y
if (isNullConstant(FalseV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
}
}

Expand All @@ -7290,13 +7290,13 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select !x, x, y) -> x & y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
FalseV);
DAG.getFreeze(FalseV));
}
// (select x, y, x) -> x & y
// (select !x, y, x) -> x | y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
FalseV);
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
DAG.getFreeze(TrueV), FalseV);
}
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/RISCV/alu64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind {
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
; RV32I-NEXT: sltiu a0, a0, 3
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
; RV32IA-NEXT: addi a2, a2, 1
; RV32IA-NEXT: sltu a4, a3, a1
; RV32IA-NEXT: neg a4, a4
; RV32IA-NEXT: and a4, a4, a2
; RV32IA-NEXT: addi a4, a2, 1
; RV32IA-NEXT: sltu a2, a2, a1
; RV32IA-NEXT: neg a2, a2
; RV32IA-NEXT: and a4, a2, a4
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down Expand Up @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
; RV64IA-NEXT: addi a2, a2, 1
; RV64IA-NEXT: sltu a4, a3, a1
; RV64IA-NEXT: neg a4, a4
; RV64IA-NEXT: and a4, a4, a2
; RV64IA-NEXT: addi a4, a2, 1
; RV64IA-NEXT: sltu a2, a2, a1
; RV64IA-NEXT: neg a2, a2
; RV64IA-NEXT: and a4, a2, a4
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down
169 changes: 96 additions & 73 deletions llvm/test/CodeGen/RISCV/bfloat-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
; RV32IZFBFMIN: # %bb.0: # %start
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: addi sp, sp, -32
; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s1, s0
; RV32IZFBFMIN-NEXT: lui a0, 913408
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s3, s2
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
; RV32IZFBFMIN-NEXT: call __fixsfdi
; RV32IZFBFMIN-NEXT: and a0, s3, a0
; RV32IZFBFMIN-NEXT: or a0, s1, a0
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a2, a2
; RV32IZFBFMIN-NEXT: lui a4, 524288
; RV32IZFBFMIN-NEXT: lui a2, 524288
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
; RV32IZFBFMIN-NEXT: li a5, 1
; RV32IZFBFMIN-NEXT: lui a3, 524288
; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
; RV32IZFBFMIN-NEXT: mv a2, a1
; RV32IZFBFMIN-NEXT: mv a3, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
; RV32IZFBFMIN-NEXT: and a0, a2, a0
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
; RV32IZFBFMIN-NEXT: addi a2, a4, -1
; RV32IZFBFMIN-NEXT: addi a3, a4, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a4, a1
; RV32IZFBFMIN-NEXT: and a1, a4, a2
; RV32IZFBFMIN-NEXT: neg a2, a3
; RV32IZFBFMIN-NEXT: neg a3, s0
; RV32IZFBFMIN-NEXT: and a0, a3, a0
; RV32IZFBFMIN-NEXT: or a0, a2, a0
; RV32IZFBFMIN-NEXT: and a0, a4, a0
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
; RV32IZFBFMIN-NEXT: and a1, a2, a3
; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 32
; RV32IZFBFMIN-NEXT: ret
;
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
; R32IDZFBFMIN: # %bb.0: # %start
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: addi sp, sp, -32
; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s1, s0
; R32IDZFBFMIN-NEXT: lui a0, 913408
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s3, s2
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
; R32IDZFBFMIN-NEXT: call __fixsfdi
; R32IDZFBFMIN-NEXT: and a0, s3, a0
; R32IDZFBFMIN-NEXT: or a0, s1, a0
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a2, a2
; R32IDZFBFMIN-NEXT: lui a4, 524288
; R32IDZFBFMIN-NEXT: lui a2, 524288
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
; R32IDZFBFMIN-NEXT: li a5, 1
; R32IDZFBFMIN-NEXT: lui a3, 524288
; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
; R32IDZFBFMIN-NEXT: mv a2, a1
; R32IDZFBFMIN-NEXT: mv a3, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
; R32IDZFBFMIN-NEXT: and a0, a2, a0
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
; R32IDZFBFMIN-NEXT: addi a2, a4, -1
; R32IDZFBFMIN-NEXT: addi a3, a4, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a4, a1
; R32IDZFBFMIN-NEXT: and a1, a4, a2
; R32IDZFBFMIN-NEXT: neg a2, a3
; R32IDZFBFMIN-NEXT: neg a3, s0
; R32IDZFBFMIN-NEXT: and a0, a3, a0
; R32IDZFBFMIN-NEXT: or a0, a2, a0
; R32IDZFBFMIN-NEXT: and a0, a4, a0
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: and a1, a2, a3
; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
; R32IDZFBFMIN-NEXT: addi sp, sp, 32
; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_l_bf16_sat:
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-NEXT: addi sp, sp, -32
; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: slli a0, a0, 16
; RV32ID-NEXT: fmv.w.x fs0, a0
; RV32ID-NEXT: flt.s s0, fa5, fs0
; RV32ID-NEXT: neg s1, s0
; RV32ID-NEXT: lui a0, 913408
; RV32ID-NEXT: fmv.w.x fa5, a0
; RV32ID-NEXT: fle.s s0, fa5, fs0
; RV32ID-NEXT: fle.s s2, fa5, fs0
; RV32ID-NEXT: neg s3, s2
; RV32ID-NEXT: fmv.s fa0, fs0
; RV32ID-NEXT: call __fixsfdi
; RV32ID-NEXT: and a0, s3, a0
; RV32ID-NEXT: or a0, s1, a0
; RV32ID-NEXT: feq.s a2, fs0, fs0
; RV32ID-NEXT: neg a2, a2
; RV32ID-NEXT: lui a4, 524288
; RV32ID-NEXT: lui a2, 524288
; RV32ID-NEXT: beqz s0, .LBB10_2
; RV32ID-NEXT: li a5, 1
; RV32ID-NEXT: lui a3, 524288
; RV32ID-NEXT: bne s2, a5, .LBB10_2
; RV32ID-NEXT: # %bb.1: # %start
; RV32ID-NEXT: mv a2, a1
; RV32ID-NEXT: mv a3, a1
; RV32ID-NEXT: .LBB10_2: # %start
; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32ID-NEXT: flt.s a3, fa5, fs0
; RV32ID-NEXT: beqz a3, .LBB10_4
; RV32ID-NEXT: and a0, a2, a0
; RV32ID-NEXT: beqz s0, .LBB10_4
; RV32ID-NEXT: # %bb.3:
; RV32ID-NEXT: addi a2, a4, -1
; RV32ID-NEXT: addi a3, a4, -1
; RV32ID-NEXT: .LBB10_4: # %start
; RV32ID-NEXT: feq.s a1, fs0, fs0
; RV32ID-NEXT: neg a4, a1
; RV32ID-NEXT: and a1, a4, a2
; RV32ID-NEXT: neg a2, a3
; RV32ID-NEXT: neg a3, s0
; RV32ID-NEXT: and a0, a3, a0
; RV32ID-NEXT: or a0, a2, a0
; RV32ID-NEXT: and a0, a4, a0
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-NEXT: and a1, a2, a3
; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32ID-NEXT: addi sp, sp, 16
; RV32ID-NEXT: addi sp, sp, 32
; RV32ID-NEXT: ret
;
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
Expand Down Expand Up @@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN-NEXT: neg s0, a0
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
; CHECK32ZFBFMIN-NEXT: neg s1, a0
; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
Expand All @@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: neg s0, a0
; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: fle.s a0, fa5, fa0
; RV32ID-NEXT: neg s1, a0
; RV32ID-NEXT: xori a0, a0, 1
; RV32ID-NEXT: addi s1, a0, -1
; RV32ID-NEXT: call __fixunssfdi
; RV32ID-NEXT: and a0, s1, a0
; RV32ID-NEXT: or a0, s0, a0
Expand Down
Loading
Loading