Skip to content

Commit

Permalink
Automerge: [X86][AVX10.2-BF16] Update VCOMISBF16 intrinsics and instr…
Browse files Browse the repository at this point in the history
…uctions (#123307)

- Add `I` to intrinsics and instructions
- Add `_` before sbf16 in intrinsics

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
  • Loading branch information
phoebewang authored and github-actions[bot] committed Jan 24, 2025
2 parents edf155b + 24f177d commit f2c5324
Show file tree
Hide file tree
Showing 14 changed files with 166 additions and 171 deletions.
11 changes: 3 additions & 8 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -5375,14 +5375,9 @@ let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<
def vsubnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">;
}

let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vcomsbf16eq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
def vcomsbf16lt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
def vcomsbf16neq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
def vcomsbf16ge : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
def vcomsbf16gt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
def vcomsbf16le : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
}
let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in
def vcomisbf16#Cmp : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;

let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vcmppbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, _Vector<32, __bf16>, _Constant int, unsigned int)">;
Expand Down
36 changes: 18 additions & 18 deletions clang/lib/Headers/avx10_2bf16intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -523,34 +523,34 @@ _mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
(__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh());
}

static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comeqsbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomsbf16eq((__v8bf)A, (__v8bf)B);
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomisbf16eq((__v8bf)A, (__v8bf)B);
}

static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comltsbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomsbf16lt((__v8bf)A, (__v8bf)B);
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomisbf16lt((__v8bf)A, (__v8bf)B);
}

static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comlesbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomsbf16le((__v8bf)A, (__v8bf)B);
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomisbf16le((__v8bf)A, (__v8bf)B);
}

static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgtsbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomsbf16gt((__v8bf)A, (__v8bf)B);
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomisbf16gt((__v8bf)A, (__v8bf)B);
}

static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgesbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomsbf16ge((__v8bf)A, (__v8bf)B);
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomisbf16ge((__v8bf)A, (__v8bf)B);
}

static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comneqsbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomsbf16neq((__v8bf)A, (__v8bf)B);
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sbh(__m128bh A,
__m128bh B) {
return __builtin_ia32_vcomisbf16neq((__v8bf)A, (__v8bf)B);
}

#define _mm256_cmp_pbh_mask(__A, __B, __P) \
Expand Down
48 changes: 24 additions & 24 deletions clang/test/CodeGen/X86/avx10_2bf16-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -648,40 +648,40 @@ __m128bh test_mm_maskz_min_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
return _mm_maskz_min_pbh(__U, __A, __B);
}

int test_mm_comeqsbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comeqsbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comeqsbh(__A, __B);
int test_mm_comieq_sbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comieq_sbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomisbf16eq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comieq_sbh(__A, __B);
}

int test_mm_comltsbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comltsbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comltsbh(__A, __B);
int test_mm_comilt_sbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comilt_sbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomisbf16lt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comilt_sbh(__A, __B);
}

int test_mm_comlesbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comlesbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comlesbh(__A, __B);
int test_mm_comile_sbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comile_sbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomisbf16le(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comile_sbh(__A, __B);
}

int test_mm_comgtsbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comgtsbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comgtsbh(__A, __B);
int test_mm_comigt_sbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comigt_sbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomisbf16gt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comigt_sbh(__A, __B);
}

int test_mm_comgesbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comgesbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comgesbh(__A, __B);
int test_mm_comige_sbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comige_sbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomisbf16ge(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comige_sbh(__A, __B);
}

int test_mm_comneqsbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comneqsbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comneqsbh(__A, __B);
int test_mm_comineq_sbh(__m128bh __A, __m128bh __B) {
// CHECK-LABEL: test_mm_comineq_sbh
// CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomisbf16neq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
return _mm_comineq_sbh(__A, __B);
}

__mmask16 test_mm256_cmp_pbh_mask_eq_oq(__m256bh a, __m256bh b) {
Expand Down
12 changes: 6 additions & 6 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -7642,22 +7642,22 @@ def int_x86_avx10_vminpbf16256 : ClangBuiltin<"__builtin_ia32_vminpbf16256">,
def int_x86_avx10_vminpbf16128 : ClangBuiltin<"__builtin_ia32_vminpbf16128">,
DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
[IntrNoMem]>;
def int_x86_avx10_vcomsbf16eq : ClangBuiltin<"__builtin_ia32_vcomsbf16eq">,
def int_x86_avx10_vcomisbf16eq : ClangBuiltin<"__builtin_ia32_vcomisbf16eq">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
[IntrNoMem]>;
def int_x86_avx10_vcomsbf16lt : ClangBuiltin<"__builtin_ia32_vcomsbf16lt">,
def int_x86_avx10_vcomisbf16lt : ClangBuiltin<"__builtin_ia32_vcomisbf16lt">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty,llvm_v8bf16_ty],
[IntrNoMem]>;
def int_x86_avx10_vcomsbf16le : ClangBuiltin<"__builtin_ia32_vcomsbf16le">,
def int_x86_avx10_vcomisbf16le : ClangBuiltin<"__builtin_ia32_vcomisbf16le">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
[IntrNoMem]>;
def int_x86_avx10_vcomsbf16gt : ClangBuiltin<"__builtin_ia32_vcomsbf16gt">,
def int_x86_avx10_vcomisbf16gt : ClangBuiltin<"__builtin_ia32_vcomisbf16gt">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
[IntrNoMem]>;
def int_x86_avx10_vcomsbf16ge : ClangBuiltin<"__builtin_ia32_vcomsbf16ge">,
def int_x86_avx10_vcomisbf16ge : ClangBuiltin<"__builtin_ia32_vcomisbf16ge">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
[IntrNoMem]>;
def int_x86_avx10_vcomsbf16neq : ClangBuiltin<"__builtin_ia32_vcomsbf16neq">,
def int_x86_avx10_vcomisbf16neq : ClangBuiltin<"__builtin_ia32_vcomisbf16neq">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
[IntrNoMem]>;
def int_x86_avx10_mask_rsqrt_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16128_mask">,
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/X86/X86InstrAVX10.td
Original file line number Diff line number Diff line change
Expand Up @@ -1359,19 +1359,19 @@ defm VMINPBF16 : avx10_fp_binopne_int_pbf16<0x5D, "vmin", SchedWriteFCmpSizes, 0
defm VMAXPBF16 : avx10_fp_binopne_int_pbf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
}

// VCOMSBF16
// VCOMISBF16
let Uses = []<Register>, mayRaiseFPException = 0,
Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
//TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *`
//which may require extend supports on BFR16X, loadbf16, ...
defm VCOMSBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
"comsbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
VEX_LIG, EVEX_CD8<16, CD8VT1>;
defm VCOMISBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
"comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
VEX_LIG, EVEX_CD8<16, CD8VT1>;

let isCodeGenOnly = 1 in {
defm VCOMSBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
sse_load_bf16, "comsbf16", SSEPackedSingle>,
T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
defm VCOMISBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
sse_load_bf16, "comisbf16", SSEPackedSingle>,
T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
}
}

Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/X86/X86IntrinsicsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,12 +751,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx10_vaddps256, INTR_TYPE_2OP, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx10_vcomsbf16eq, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(avx10_vcomsbf16ge, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(avx10_vcomsbf16gt, COMI, X86ISD::COMI, ISD::SETGT),
X86_INTRINSIC_DATA(avx10_vcomsbf16le, COMI, X86ISD::COMI, ISD::SETLE),
X86_INTRINSIC_DATA(avx10_vcomsbf16lt, COMI, X86ISD::COMI, ISD::SETLT),
X86_INTRINSIC_DATA(avx10_vcomsbf16neq, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(avx10_vcomisbf16eq, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(avx10_vcomisbf16ge, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(avx10_vcomisbf16gt, COMI, X86ISD::COMI, ISD::SETGT),
X86_INTRINSIC_DATA(avx10_vcomisbf16le, COMI, X86ISD::COMI, ISD::SETLE),
X86_INTRINSIC_DATA(avx10_vcomisbf16lt, COMI, X86ISD::COMI, ISD::SETLT),
X86_INTRINSIC_DATA(avx10_vcomisbf16neq, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(avx10_vcvt2ph2bf8128, INTR_TYPE_2OP, X86ISD::VCVT2PH2BF8,
0),
X86_INTRINSIC_DATA(avx10_vcvt2ph2bf8256, INTR_TYPE_2OP, X86ISD::VCVT2PH2BF8,
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -118,69 +118,69 @@ define <8 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_128(<8 x bfloat> %x1,
ret <8 x bfloat> %res1
}

declare i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomisbf16eq(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomisbf16lt(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomisbf16le(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomisbf16gt(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomisbf16ge(<8 x bfloat>, <8 x bfloat>)
declare i32 @llvm.x86.avx10.vcomisbf16neq(<8 x bfloat>, <8 x bfloat>)

define i32 @test_x86_avx10_com_nesbf16_eq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
; CHECK-LABEL: test_x86_avx10_com_nesbf16_eq:
; CHECK: # %bb.0:
; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
; CHECK-NEXT: vcomisbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
; CHECK-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
; CHECK-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
; CHECK-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1)
%res = call i32 @llvm.x86.avx10.vcomisbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1)
ret i32 %res
}

define i32 @test_x86_avx10_com_nesbf16_lt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
; CHECK-LABEL: test_x86_avx10_com_nesbf16_lt:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
; CHECK-NEXT: vcomisbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1)
%res = call i32 @llvm.x86.avx10.vcomisbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1)
ret i32 %res
}

define i32 @test_x86_avx10_com_nesbf16_le(<8 x bfloat> %a0, <8 x bfloat> %a1) {
; CHECK-LABEL: test_x86_avx10_com_nesbf16_le:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
; CHECK-NEXT: vcomisbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1)
%res = call i32 @llvm.x86.avx10.vcomisbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1)
ret i32 %res
}

define i32 @test_x86_avx10_com_nesbf16_gt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
; CHECK-LABEL: test_x86_avx10_com_nesbf16_gt:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
; CHECK-NEXT: vcomisbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1)
%res = call i32 @llvm.x86.avx10.vcomisbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1)
ret i32 %res
}

define i32 @test_x86_avx10_com_nesbf16_neq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
; CHECK-LABEL: test_x86_avx10_com_nesbf16_neq:
; CHECK: # %bb.0:
; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
; CHECK-NEXT: vcomisbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
; CHECK-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
; CHECK-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
; CHECK-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1)
%res = call i32 @llvm.x86.avx10.vcomisbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1)
ret i32 %res
}

Expand Down
28 changes: 14 additions & 14 deletions llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,32 +205,32 @@
# INTEL: vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123
0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b

# ATT: vcomsbf16 %xmm3, %xmm2
# INTEL: vcomsbf16 xmm2, xmm3
# ATT: vcomisbf16 %xmm3, %xmm2
# INTEL: vcomisbf16 xmm2, xmm3
0x62,0xf5,0x7d,0x08,0x2f,0xd3

# ATT: vcomsbf16 268435456(%esp,%esi,8), %xmm2
# INTEL: vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456]
# ATT: vcomisbf16 268435456(%esp,%esi,8), %xmm2
# INTEL: vcomisbf16 xmm2, word ptr [esp + 8*esi + 268435456]
0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vcomsbf16 291(%edi,%eax,4), %xmm2
# INTEL: vcomsbf16 xmm2, word ptr [edi + 4*eax + 291]
# ATT: vcomisbf16 291(%edi,%eax,4), %xmm2
# INTEL: vcomisbf16 xmm2, word ptr [edi + 4*eax + 291]
0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vcomsbf16 (%eax), %xmm2
# INTEL: vcomsbf16 xmm2, word ptr [eax]
# ATT: vcomisbf16 (%eax), %xmm2
# INTEL: vcomisbf16 xmm2, word ptr [eax]
0x62,0xf5,0x7d,0x08,0x2f,0x10

# ATT: vcomsbf16 -64(,%ebp,2), %xmm2
# INTEL: vcomsbf16 xmm2, word ptr [2*ebp - 64]
# ATT: vcomisbf16 -64(,%ebp,2), %xmm2
# INTEL: vcomisbf16 xmm2, word ptr [2*ebp - 64]
0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff

# ATT: vcomsbf16 254(%ecx), %xmm2
# INTEL: vcomsbf16 xmm2, word ptr [ecx + 254]
# ATT: vcomisbf16 254(%ecx), %xmm2
# INTEL: vcomisbf16 xmm2, word ptr [ecx + 254]
0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f

# ATT: vcomsbf16 -256(%edx), %xmm2
# INTEL: vcomsbf16 xmm2, word ptr [edx - 256]
# ATT: vcomisbf16 -256(%edx), %xmm2
# INTEL: vcomisbf16 xmm2, word ptr [edx - 256]
0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80

# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,32 +205,32 @@
# INTEL: vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123
0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b

# ATT: vcomsbf16 %xmm23, %xmm22
# INTEL: vcomsbf16 xmm22, xmm23
# ATT: vcomisbf16 %xmm23, %xmm22
# INTEL: vcomisbf16 xmm22, xmm23
0x62,0xa5,0x7d,0x08,0x2f,0xf7

# ATT: vcomsbf16 268435456(%rbp,%r14,8), %xmm22
# INTEL: vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
# ATT: vcomisbf16 268435456(%rbp,%r14,8), %xmm22
# INTEL: vcomisbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10

# ATT: vcomsbf16 291(%r8,%rax,4), %xmm22
# INTEL: vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291]
# ATT: vcomisbf16 291(%r8,%rax,4), %xmm22
# INTEL: vcomisbf16 xmm22, word ptr [r8 + 4*rax + 291]
0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00

# ATT: vcomsbf16 (%rip), %xmm22
# INTEL: vcomsbf16 xmm22, word ptr [rip]
# ATT: vcomisbf16 (%rip), %xmm22
# INTEL: vcomisbf16 xmm22, word ptr [rip]
0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00

# ATT: vcomsbf16 -64(,%rbp,2), %xmm22
# INTEL: vcomsbf16 xmm22, word ptr [2*rbp - 64]
# ATT: vcomisbf16 -64(,%rbp,2), %xmm22
# INTEL: vcomisbf16 xmm22, word ptr [2*rbp - 64]
0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff

# ATT: vcomsbf16 254(%rcx), %xmm22
# INTEL: vcomsbf16 xmm22, word ptr [rcx + 254]
# ATT: vcomisbf16 254(%rcx), %xmm22
# INTEL: vcomisbf16 xmm22, word ptr [rcx + 254]
0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f

# ATT: vcomsbf16 -256(%rdx), %xmm22
# INTEL: vcomsbf16 xmm22, word ptr [rdx - 256]
# ATT: vcomisbf16 -256(%rdx), %xmm22
# INTEL: vcomisbf16 xmm22, word ptr [rdx - 256]
0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80

# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22
Expand Down
Loading

0 comments on commit f2c5324

Please sign in to comment.