diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index a86ec240b9193..f97145db0daaf 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -998,13 +998,13 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, EXTRACT_get_vextract256_imm, [HasAVX512]>; // vextractps - extract 32 bits from XMM -def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), +def VEXTRACTPSZrri : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX, WIG, Sched<[WriteVecExtract]>; -def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), +def VEXTRACTPSZmri : AVX512AIi8<0x17, MRMDestMem, (outs), (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 18d7e54ac5461..d4b49051012c6 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5333,19 +5333,19 @@ defm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W; /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory /// destination multiclass SS41I_extractf32 opc, string OpcodeStr> { - def rr : SS4AIi8, - Sched<[WriteVecExtract]>; - def mr : SS4AIi8, Sched<[WriteVecExtractSt]>; + def rri : SS4AIi8, + Sched<[WriteVecExtract]>; + def mri : SS4AIi8, Sched<[WriteVecExtractSt]>; } let ExeDomain = SSEPackedSingle in { diff --git a/llvm/lib/Target/X86/X86ReplaceableInstrs.def b/llvm/lib/Target/X86/X86ReplaceableInstrs.def index 413c2677041df..e9107af2acac7 100644 --- a/llvm/lib/Target/X86/X86ReplaceableInstrs.def +++ b/llvm/lib/Target/X86/X86ReplaceableInstrs.def @@ -42,8 +42,8 @@ ENTRY(UNPCKLPSrm, UNPCKLPSrm, PUNPCKLDQrm) ENTRY(UNPCKLPSrr, UNPCKLPSrr, PUNPCKLDQrr) ENTRY(UNPCKHPSrm, UNPCKHPSrm, PUNPCKHDQrm) ENTRY(UNPCKHPSrr, UNPCKHPSrr, PUNPCKHDQrr) -ENTRY(EXTRACTPSmr, EXTRACTPSmr, PEXTRDmr) -ENTRY(EXTRACTPSrr, EXTRACTPSrr, PEXTRDrr) +ENTRY(EXTRACTPSmri, EXTRACTPSmri, PEXTRDmr) +ENTRY(EXTRACTPSrri, EXTRACTPSrri, PEXTRDrr) // AVX 128-bit support ENTRY(VMOVAPSmr, VMOVAPDmr, VMOVDQAmr) ENTRY(VMOVAPSrm, VMOVAPDrm, VMOVDQArm) @@ -74,8 +74,8 @@ ENTRY(VUNPCKLPSrm, VUNPCKLPSrm, VPUNPCKLDQrm) ENTRY(VUNPCKLPSrr, VUNPCKLPSrr, VPUNPCKLDQrr) ENTRY(VUNPCKHPSrm, VUNPCKHPSrm, VPUNPCKHDQrm) ENTRY(VUNPCKHPSrr, VUNPCKHPSrr, VPUNPCKHDQrr) -ENTRY(VEXTRACTPSmr, VEXTRACTPSmr, VPEXTRDmr) -ENTRY(VEXTRACTPSrr, VEXTRACTPSrr, VPEXTRDrr) +ENTRY(VEXTRACTPSmri, VEXTRACTPSmri, VPEXTRDmr) +ENTRY(VEXTRACTPSrri, VEXTRACTPSrri, VPEXTRDrr) // AVX 256-bit support ENTRY(VMOVAPSYmr, VMOVAPDYmr, VMOVDQAYmr) ENTRY(VMOVAPSYrm, VMOVAPDYrm, VMOVDQAYrm) @@ -178,8 +178,8 @@ ENTRY(VUNPCKLPSZrm, VUNPCKLPSZrm, VPUNPCKLDQZrm) ENTRY(VUNPCKLPSZrr, VUNPCKLPSZrr, VPUNPCKLDQZrr) ENTRY(VUNPCKHPSZrm, VUNPCKHPSZrm, VPUNPCKHDQZrm) ENTRY(VUNPCKHPSZrr, VUNPCKHPSZrr, VPUNPCKHDQZrr) -ENTRY(VEXTRACTPSZmr, VEXTRACTPSZmr, VPEXTRDZmr) -ENTRY(VEXTRACTPSZrr, VEXTRACTPSZrr, VPEXTRDZrr) +ENTRY(VEXTRACTPSZmri, VEXTRACTPSZmri, VPEXTRDZmr) +ENTRY(VEXTRACTPSZrri, VEXTRACTPSZrri, VPEXTRDZrr) }; static const uint16_t ReplaceableInstrsAVX2[][3] = { diff --git a/llvm/lib/Target/X86/X86SchedAlderlakeP.td b/llvm/lib/Target/X86/X86SchedAlderlakeP.td index 6917ff50d13f2..e0a5a4f5b49e5 100644 --- a/llvm/lib/Target/X86/X86SchedAlderlakeP.td +++ b/llvm/lib/Target/X86/X86SchedAlderlakeP.td @@ -886,14 +886,14 @@ def ADLPWriteResGroup50 : SchedWriteRes<[ADLPPort04_09, ADLPPort05, ADLPPort07_0 let Latency = 12; let NumMicroOps = 3; } -def : InstRW<[ADLPWriteResGroup50], (instregex "^(V?)EXTRACTPSmr$")>; +def : InstRW<[ADLPWriteResGroup50], (instregex "^(V?)EXTRACTPSmri$")>; def : InstRW<[ADLPWriteResGroup50], (instrs SMSW16m)>; def ADLPWriteResGroup51 : SchedWriteRes<[ADLPPort00, ADLPPort05]> { let Latency = 4; let NumMicroOps = 2; } -def : InstRW<[ADLPWriteResGroup51], (instregex "^(V?)EXTRACTPSrr$")>; +def : InstRW<[ADLPWriteResGroup51], (instregex "^(V?)EXTRACTPSrri$")>; def : InstRW<[ADLPWriteResGroup51], (instrs MMX_PEXTRWrr)>; def ADLPWriteResGroup52 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort02_03, ADLPPort02_03_07, ADLPPort04, ADLPPort06]> { diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 8c0fb11afe336..b32db53ff08cd 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -884,7 +884,7 @@ def ICXWriteResGroup36 : SchedWriteRes<[ICXPort0,ICXPort5]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } -def: InstRW<[ICXWriteResGroup36], (instregex "(V?)EXTRACTPS(Z?)rr")>; +def: InstRW<[ICXWriteResGroup36], (instregex "(V?)EXTRACTPS(Z?)rri")>; def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> { let Latency = 3; @@ -1034,7 +1034,7 @@ def ICXWriteResGroup53 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } -def: InstRW<[ICXWriteResGroup53], (instregex "(V?)EXTRACTPS(Z?)mr")>; +def: InstRW<[ICXWriteResGroup53], (instregex "(V?)EXTRACTPS(Z?)mri")>; def ICXWriteResGroup54 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { let Latency = 4; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index fc42d5e52451e..7be9f51bcd46b 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -686,7 +686,7 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } -def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrri")>; def SBWriteResGroup23 : SchedWriteRes<[SBPort05,SBPort015]> { let Latency = 2; @@ -789,7 +789,7 @@ def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { } def: InstRW<[SBWriteResGroup36], (instrs CALL64pcrel32)>; def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r", - "(V?)EXTRACTPSmr")>; + "(V?)EXTRACTPSmri")>; def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { let Latency = 5; diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 413d5bbaadd4d..ae282b8c287f8 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -1004,18 +1004,18 @@ def SPRWriteResGroup54 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> let Latency = 12; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup54], (instregex "^(V?)EXTRACTPSmr$", +def : InstRW<[SPRWriteResGroup54], (instregex "^(V?)EXTRACTPSmri$", "^VPMOVQDZ((256)?)mr$")>; def : InstRW<[SPRWriteResGroup54], (instrs SMSW16m, - VEXTRACTPSZmr)>; + VEXTRACTPSZmri)>; def SPRWriteResGroup55 : SchedWriteRes<[SPRPort00, SPRPort05]> { let Latency = 4; let NumMicroOps = 2; } -def : InstRW<[SPRWriteResGroup55], (instregex "^(V?)EXTRACTPSrr$")>; +def : InstRW<[SPRWriteResGroup55], (instregex "^(V?)EXTRACTPSrri$")>; def : InstRW<[SPRWriteResGroup55], (instrs MMX_PEXTRWrr, - VEXTRACTPSZrr, + VEXTRACTPSZrri, VPERMWZrr)>; def SPRWriteResGroup56 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort06]> { diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index f66b7172a7ffc..b1a7b2fb2c6cb 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -1017,7 +1017,7 @@ def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> { let NumMicroOps = 2; let ReleaseAtCycles = [1, 2]; } -def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; +def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrri")>; def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> { let Latency = 5; @@ -1025,7 +1025,7 @@ def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> { let ReleaseAtCycles = [5, 1, 2]; } // m32,x,i. -def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; +def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmri")>; // VEXTRACTF128 / VEXTRACTI128. // x,y,i. diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 14e18cb9dc717..8ac095bd1507b 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -1023,7 +1023,7 @@ def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> { let NumMicroOps = 2; let ReleaseAtCycles = [1, 2]; } -def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; +def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrri")>; def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> { let Latency = 5; @@ -1031,7 +1031,7 @@ def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> { let ReleaseAtCycles = [5, 1, 2]; } // m32,x,i. -def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; +def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmri")>; // VEXTRACTF128 / VEXTRACTI128. // x,y,i. diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index 1d2cda259d8e5..f89e958bc9ad6 100644 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -2300,10 +2300,10 @@ body: | VUCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr ; CHECK: VUCOMISSrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr VUCOMISSZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr - ; CHECK: VEXTRACTPSmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 - VEXTRACTPSZmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 - ; CHECK: $eax = VEXTRACTPSrr $xmm0, 1 - $eax = VEXTRACTPSZrr $xmm0, 1 + ; CHECK: VEXTRACTPSmri $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 + VEXTRACTPSZmri $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 + ; CHECK: $eax = VEXTRACTPSrri $xmm0, 1 + $eax = VEXTRACTPSZrri $xmm0, 1 ; CHECK: $xmm0 = VINSERTPSrmi $xmm0, $rdi, 1, $noreg, 0, $noreg, 1 $xmm0 = VINSERTPSZrmi $xmm0, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VINSERTPSrri $xmm0, $xmm0, 1 @@ -4068,10 +4068,10 @@ body: | $xmm16 = VPALIGNRZ128rmi $xmm16, $rdi, 1, $noreg, 0, $noreg, 15 ; CHECK: $xmm16 = VPALIGNRZ128rri $xmm16, $xmm1, 15 $xmm16 = VPALIGNRZ128rri $xmm16, $xmm1, 15 - ; CHECK: VEXTRACTPSZmr $rdi, 1, $noreg, 0, $noreg, $xmm16, 1 - VEXTRACTPSZmr $rdi, 1, $noreg, 0, $noreg, $xmm16, 1 - ; CHECK: $eax = VEXTRACTPSZrr $xmm16, 1 - $eax = VEXTRACTPSZrr $xmm16, 1 + ; CHECK: VEXTRACTPSZmri $rdi, 1, $noreg, 0, $noreg, $xmm16, 1 + VEXTRACTPSZmri $rdi, 1, $noreg, 0, $noreg, $xmm16, 1 + ; CHECK: $eax = VEXTRACTPSZrri $xmm16, 1 + $eax = VEXTRACTPSZrri $xmm16, 1 ; CHECK: $xmm16 = VINSERTPSZrmi $xmm16, $rdi, 1, $noreg, 0, $noreg, 1 $xmm16 = VINSERTPSZrmi $xmm16, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm16 = VINSERTPSZrri $xmm16, $xmm16, 1 diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll index d219ee9652b6e..73813c0106e09 100644 --- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll +++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll @@ -76,7 +76,7 @@ define void @one_mask_bit_set2(ptr %addr, <4 x float> %val) { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK-NEXT: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store (s32) into %ir.addr + 8) + ; CHECK-NEXT: VEXTRACTPSmri [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store (s32) into %ir.addr + 8) ; CHECK-NEXT: RET 0 call void @llvm.masked.store.v4f32.p0(<4 x float> %val, ptr %addr, i32 4, <4 x i1>) ret void diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index c731de84759a9..65e6ac59e0550 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -402,7 +402,7 @@ static const X86FoldTableEntry Table0[] = { {X86::DIV64r_NF, X86::DIV64m_NF, TB_FOLDED_LOAD}, {X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD}, {X86::DIV8r_NF, X86::DIV8m_NF, TB_FOLDED_LOAD}, - {X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE}, + {X86::EXTRACTPSrri, X86::EXTRACTPSmri, TB_FOLDED_STORE}, {X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD}, {X86::IDIV16r_NF, X86::IDIV16m_NF, TB_FOLDED_LOAD}, {X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD}, @@ -500,8 +500,8 @@ static const X86FoldTableEntry Table0[] = { {X86::VEXTRACTI64x2Z256rri, X86::VEXTRACTI64x2Z256mri, TB_FOLDED_STORE}, {X86::VEXTRACTI64x2Zrri, X86::VEXTRACTI64x2Zmri, TB_FOLDED_STORE}, {X86::VEXTRACTI64x4Zrri, X86::VEXTRACTI64x4Zmri, TB_FOLDED_STORE}, - {X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE}, - {X86::VEXTRACTPSrr, X86::VEXTRACTPSmr, TB_FOLDED_STORE}, + {X86::VEXTRACTPSZrri, X86::VEXTRACTPSZmri, TB_FOLDED_STORE}, + {X86::VEXTRACTPSrri, X86::VEXTRACTPSmri, TB_FOLDED_STORE}, {X86::VMOV64toSDZrr, X86::MOV64mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::VMOV64toSDrr, X86::MOV64mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_32},