Skip to content

Commit

Permalink
add v_sat_pk pattern for fake16, add test cases for GFX12, merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
Shoreshen committed Jan 15, 2025
1 parent 582c56a commit 5319c9f
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 49 deletions.
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3318,12 +3318,14 @@ multiclass V_SAT_PK_Pat<Instruction inst> {

def: GCNPatIgnoreCopies<
(i16 (conc_lo_v2i16_i16 (clamp_v2i16_u8 v2i16:$src))),
(inst VGPR_32:$src)
(inst VRegSrc_32:$src)
>;
}

let OtherPredicates = [NotHasTrue16BitInsts] in
defm : V_SAT_PK_Pat<V_SAT_PK_U8_I16_e64>;
let OtherPredicates = [NotHasTrue16BitInsts] in {
defm : V_SAT_PK_Pat<V_SAT_PK_U8_I16_e64>;
} // End OtherPredicates = [NotHasTrue16BitInsts]
defm : V_SAT_PK_Pat<V_SAT_PK_U8_I16_fake16_e64>;

// With multiple uses of the shift, this will duplicate the shift and
// increase register pressure.
Expand Down
98 changes: 52 additions & 46 deletions llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,19 @@ define i16 @basic_smax_smin_bit_or(i16 %src0, i16 %src1) {
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: basic_smax_smin_bit_or:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
; SDAG-GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; SDAG-GFX12-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: basic_smax_smin_bit_or:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -926,6 +939,20 @@ define i16 @basic_umax_umin_bit_or(i16 %src0, i16 %src1) {
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: basic_umax_umin_bit_or:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
; SDAG-GFX12-NEXT: v_min_u16 v1, 0xff, v1
; SDAG-GFX12-NEXT: v_min_u16 v0, 0xff, v0
; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX12-NEXT: v_lshlrev_b16 v1, 8, v1
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: basic_umax_umin_bit_or:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -992,13 +1019,10 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
; SDAG-GFX11-LABEL: basic_smax_smin_vec_cast:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: basic_smax_smin_vec_cast:
Expand Down Expand Up @@ -1041,11 +1065,10 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
; GISEL-GFX11-LABEL: basic_smax_smin_vec_cast:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: basic_smax_smin_vec_cast:
Expand Down Expand Up @@ -1117,6 +1140,19 @@ define i16 @basic_smax_smin_bit_shl(i16 %src0, i16 %src1) {
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: basic_smax_smin_bit_shl:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
; SDAG-GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; SDAG-GFX12-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: basic_smax_smin_bit_shl:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -1185,13 +1221,7 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
; SDAG-GFX11-LABEL: basic_smax_smin_vec_input:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
; SDAG-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: basic_smax_smin_vec_input:
Expand Down Expand Up @@ -1232,16 +1262,7 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
; GISEL-GFX11-LABEL: basic_smax_smin_vec_input:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_pk_max_i16 v0, 0, v0
; GISEL-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: basic_smax_smin_vec_input:
Expand Down Expand Up @@ -1296,13 +1317,7 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
; SDAG-GFX11-LABEL: basic_smax_smin_vec_input_rev:
; SDAG-GFX11: ; %bb.0:
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
; SDAG-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX12-LABEL: basic_smax_smin_vec_input_rev:
Expand Down Expand Up @@ -1342,16 +1357,7 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
; GISEL-GFX11-LABEL: basic_smax_smin_vec_input_rev:
; GISEL-GFX11: ; %bb.0:
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX11-NEXT: v_pk_max_i16 v0, 0, v0
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
; GISEL-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: basic_smax_smin_vec_input_rev:
Expand Down

0 comments on commit 5319c9f

Please sign in to comment.