Skip to content

Commit

Permalink
[RISCV] Insert a freeze before converting select to AND/OR. (llvm#84232)
Browse files Browse the repository at this point in the history
Select blocks poison, but AND/OR do not. We need to insert a freeze
to block poison propagation.

This creates suboptimal codegen which I will try to fix with other
patches. I'm prioritizing the correctness fix since we have 2 bug reports.

Fixes llvm#84200 and llvm#84350
  • Loading branch information
topperc authored Mar 7, 2024
1 parent 14171b8 commit 909ab0e
Show file tree
Hide file tree
Showing 21 changed files with 2,587 additions and 2,356 deletions.
14 changes: 7 additions & 7 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, -1) -> (c-1) | y
if (isAllOnesConstant(FalseV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
}

// (select c, 0, y) -> (c-1) & y
if (isNullConstant(TrueV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, 0) -> -c & y
if (isNullConstant(FalseV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
}
}

Expand All @@ -7290,13 +7290,13 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select !x, x, y) -> x & y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
FalseV);
DAG.getFreeze(FalseV));
}
// (select x, y, x) -> x & y
// (select !x, y, x) -> x | y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
FalseV);
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
DAG.getFreeze(TrueV), FalseV);
}
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/RISCV/alu64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind {
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
; RV32I-NEXT: sltiu a0, a0, 3
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
; RV32IA-NEXT: addi a2, a2, 1
; RV32IA-NEXT: sltu a4, a3, a1
; RV32IA-NEXT: neg a4, a4
; RV32IA-NEXT: and a4, a4, a2
; RV32IA-NEXT: addi a4, a2, 1
; RV32IA-NEXT: sltu a2, a2, a1
; RV32IA-NEXT: neg a2, a2
; RV32IA-NEXT: and a4, a2, a4
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down Expand Up @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
; RV64IA-NEXT: addi a2, a2, 1
; RV64IA-NEXT: sltu a4, a3, a1
; RV64IA-NEXT: neg a4, a4
; RV64IA-NEXT: and a4, a4, a2
; RV64IA-NEXT: addi a4, a2, 1
; RV64IA-NEXT: sltu a2, a2, a1
; RV64IA-NEXT: neg a2, a2
; RV64IA-NEXT: and a4, a2, a4
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down
169 changes: 96 additions & 73 deletions llvm/test/CodeGen/RISCV/bfloat-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
; RV32IZFBFMIN: # %bb.0: # %start
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: addi sp, sp, -32
; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s1, s0
; RV32IZFBFMIN-NEXT: lui a0, 913408
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s3, s2
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
; RV32IZFBFMIN-NEXT: call __fixsfdi
; RV32IZFBFMIN-NEXT: and a0, s3, a0
; RV32IZFBFMIN-NEXT: or a0, s1, a0
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a2, a2
; RV32IZFBFMIN-NEXT: lui a4, 524288
; RV32IZFBFMIN-NEXT: lui a2, 524288
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
; RV32IZFBFMIN-NEXT: li a5, 1
; RV32IZFBFMIN-NEXT: lui a3, 524288
; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
; RV32IZFBFMIN-NEXT: mv a2, a1
; RV32IZFBFMIN-NEXT: mv a3, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
; RV32IZFBFMIN-NEXT: and a0, a2, a0
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
; RV32IZFBFMIN-NEXT: addi a2, a4, -1
; RV32IZFBFMIN-NEXT: addi a3, a4, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a4, a1
; RV32IZFBFMIN-NEXT: and a1, a4, a2
; RV32IZFBFMIN-NEXT: neg a2, a3
; RV32IZFBFMIN-NEXT: neg a3, s0
; RV32IZFBFMIN-NEXT: and a0, a3, a0
; RV32IZFBFMIN-NEXT: or a0, a2, a0
; RV32IZFBFMIN-NEXT: and a0, a4, a0
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
; RV32IZFBFMIN-NEXT: and a1, a2, a3
; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 32
; RV32IZFBFMIN-NEXT: ret
;
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
; R32IDZFBFMIN: # %bb.0: # %start
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: addi sp, sp, -32
; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s1, s0
; R32IDZFBFMIN-NEXT: lui a0, 913408
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s3, s2
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
; R32IDZFBFMIN-NEXT: call __fixsfdi
; R32IDZFBFMIN-NEXT: and a0, s3, a0
; R32IDZFBFMIN-NEXT: or a0, s1, a0
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a2, a2
; R32IDZFBFMIN-NEXT: lui a4, 524288
; R32IDZFBFMIN-NEXT: lui a2, 524288
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
; R32IDZFBFMIN-NEXT: li a5, 1
; R32IDZFBFMIN-NEXT: lui a3, 524288
; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
; R32IDZFBFMIN-NEXT: mv a2, a1
; R32IDZFBFMIN-NEXT: mv a3, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
; R32IDZFBFMIN-NEXT: and a0, a2, a0
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
; R32IDZFBFMIN-NEXT: addi a2, a4, -1
; R32IDZFBFMIN-NEXT: addi a3, a4, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a4, a1
; R32IDZFBFMIN-NEXT: and a1, a4, a2
; R32IDZFBFMIN-NEXT: neg a2, a3
; R32IDZFBFMIN-NEXT: neg a3, s0
; R32IDZFBFMIN-NEXT: and a0, a3, a0
; R32IDZFBFMIN-NEXT: or a0, a2, a0
; R32IDZFBFMIN-NEXT: and a0, a4, a0
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: and a1, a2, a3
; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
; R32IDZFBFMIN-NEXT: addi sp, sp, 32
; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_l_bf16_sat:
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-NEXT: addi sp, sp, -32
; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: slli a0, a0, 16
; RV32ID-NEXT: fmv.w.x fs0, a0
; RV32ID-NEXT: flt.s s0, fa5, fs0
; RV32ID-NEXT: neg s1, s0
; RV32ID-NEXT: lui a0, 913408
; RV32ID-NEXT: fmv.w.x fa5, a0
; RV32ID-NEXT: fle.s s0, fa5, fs0
; RV32ID-NEXT: fle.s s2, fa5, fs0
; RV32ID-NEXT: neg s3, s2
; RV32ID-NEXT: fmv.s fa0, fs0
; RV32ID-NEXT: call __fixsfdi
; RV32ID-NEXT: and a0, s3, a0
; RV32ID-NEXT: or a0, s1, a0
; RV32ID-NEXT: feq.s a2, fs0, fs0
; RV32ID-NEXT: neg a2, a2
; RV32ID-NEXT: lui a4, 524288
; RV32ID-NEXT: lui a2, 524288
; RV32ID-NEXT: beqz s0, .LBB10_2
; RV32ID-NEXT: li a5, 1
; RV32ID-NEXT: lui a3, 524288
; RV32ID-NEXT: bne s2, a5, .LBB10_2
; RV32ID-NEXT: # %bb.1: # %start
; RV32ID-NEXT: mv a2, a1
; RV32ID-NEXT: mv a3, a1
; RV32ID-NEXT: .LBB10_2: # %start
; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32ID-NEXT: flt.s a3, fa5, fs0
; RV32ID-NEXT: beqz a3, .LBB10_4
; RV32ID-NEXT: and a0, a2, a0
; RV32ID-NEXT: beqz s0, .LBB10_4
; RV32ID-NEXT: # %bb.3:
; RV32ID-NEXT: addi a2, a4, -1
; RV32ID-NEXT: addi a3, a4, -1
; RV32ID-NEXT: .LBB10_4: # %start
; RV32ID-NEXT: feq.s a1, fs0, fs0
; RV32ID-NEXT: neg a4, a1
; RV32ID-NEXT: and a1, a4, a2
; RV32ID-NEXT: neg a2, a3
; RV32ID-NEXT: neg a3, s0
; RV32ID-NEXT: and a0, a3, a0
; RV32ID-NEXT: or a0, a2, a0
; RV32ID-NEXT: and a0, a4, a0
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-NEXT: and a1, a2, a3
; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32ID-NEXT: addi sp, sp, 16
; RV32ID-NEXT: addi sp, sp, 32
; RV32ID-NEXT: ret
;
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
Expand Down Expand Up @@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN-NEXT: neg s0, a0
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
; CHECK32ZFBFMIN-NEXT: neg s1, a0
; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
Expand All @@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: neg s0, a0
; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: fle.s a0, fa5, fa0
; RV32ID-NEXT: neg s1, a0
; RV32ID-NEXT: xori a0, a0, 1
; RV32ID-NEXT: addi s1, a0, -1
; RV32ID-NEXT: call __fixunssfdi
; RV32ID-NEXT: and a0, s1, a0
; RV32ID-NEXT: or a0, s0, a0
Expand Down
Loading

0 comments on commit 909ab0e

Please sign in to comment.