From a2e1fd5ffab4fe64a160571a9dffaacc3f22c32d Mon Sep 17 00:00:00 2001 From: Djordje Todorovic Date: Thu, 26 Dec 2024 09:09:24 +0100 Subject: [PATCH] [RISCV] Add MIPS extensions Adding two extensions for MIPS p8700 CPU: 1. cmove (conditional move) 2. lsp (load/store pair) The official product page here: https://mips.com/products/hardware/p8700 --- clang/include/clang/Driver/Options.td | 4 + clang/lib/Driver/ToolChains/Clang.cpp | 15 + llvm/docs/RISCVUsage.rst | 6 + .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 10 + llvm/lib/Target/RISCV/CMakeLists.txt | 1 + .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 1 + llvm/lib/Target/RISCV/RISCV.h | 2 + llvm/lib/Target/RISCV/RISCVFeatures.td | 13 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +- llvm/lib/Target/RISCV/RISCVInstrFormats.td | 72 +++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 43 ++ llvm/lib/Target/RISCV/RISCVInstrInfo.h | 6 + llvm/lib/Target/RISCV/RISCVInstrInfo.td | 145 +++++ llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 125 ----- llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td | 82 +++ .../Target/RISCV/RISCVLoadStoreOptimizer.cpp | 370 +++++++++++++ llvm/lib/Target/RISCV/RISCVProcessors.td | 4 +- llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 18 + llvm/lib/Target/RISCV/RISCVSubtarget.h | 2 + llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 15 + llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 2 + llvm/test/CodeGen/RISCV/load-store-pair.ll | 509 ++++++++++++++++++ llvm/test/CodeGen/RISCV/select-and.ll | 25 + llvm/test/CodeGen/RISCV/select-bare.ll | 14 + llvm/test/CodeGen/RISCV/select-cc.ll | 86 +++ llvm/test/CodeGen/RISCV/select-or.ll | 25 + 27 files changed, 1473 insertions(+), 127 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td create mode 100644 llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp create mode 100644 llvm/test/CodeGen/RISCV/load-store-pair.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d922709db177865..28b7d70f77d4a2c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4963,6 +4963,10 @@ def msave_restore : Flag<["-"], "msave-restore">, Group, def mno_save_restore : Flag<["-"], "mno-save-restore">, Group, HelpText<"Disable using library calls for save and restore">; } // let Flags = [TargetSpecific] +def mload_store_pairs : Flag<["-"], "mload-store-pairs">, Group; +def mno_load_store_pairs : Flag<["-"], "mno-load-store-pairs">, Group; +def mccmov : Flag<["-"], "mccmov">, Group; +def mno_ccmov : Flag<["-"], "mno-ccmov">, Group; let Flags = [TargetSpecific] in { def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group, HelpText<"Enable use of experimental RISC-V extensions.">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a020e00cd17392d..e91df8838032cdf 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2167,6 +2167,21 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args, CmdArgs.push_back(A->getValue()); } + if (Arg *A = Args.getLastArg(options::OPT_mload_store_pairs, + options::OPT_mno_load_store_pairs)) { + if (A->getOption().matches(options::OPT_mload_store_pairs)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-riscv-load-store-pairs=1"); + } + } + + if (Arg *A = Args.getLastArg(options::OPT_mccmov, + options::OPT_mno_ccmov)) { + if (A->getOption().matches(options::OPT_mno_ccmov)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-riscv-ccmov=0"); + } + } // Handle -mrvv-vector-bits= if (Arg *A = Args.getLastArg(options::OPT_mrvv_vector_bits_EQ)) { StringRef Val = A->getValue(); diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 22600f5720553e8..06b32a69cef9ea4 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -444,6 +444,12 @@ The current vendor extensions supported are: ``experimental-Xqcisls`` LLVM implements `version 0.2 of the Qualcomm uC Scaled Load Store extension specification `__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32. +``Xmipscmove`` + LLVM implements conditional move for the `p8700 processor ` by MIPS. + +``Xmipslsp`` + LLVM implements load/store pair instructions for the `p8700 processor ` by MIPS. + Experimental C Intrinsics ========================= diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 4c1fd5aa41e2b70..76eb5254a19a348 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -876,6 +876,16 @@ struct RISCVOperand final : public MCParsedAsmOperand { VK == RISCVMCExpr::VK_RISCV_None; } + bool isUImm7Lsb000() const { + if (!isImm()) + return false; + int64_t Imm; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && isShiftedUInt<4, 3>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + bool isUImm8Lsb00() const { if (!isImm()) return false; diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 44661647a863103..cc9bf5727cbdf5e 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(RISCVCodeGen RISCVISelLowering.cpp RISCVLandingPadSetup.cpp RISCVMachineFunctionInfo.cpp + RISCVLoadStoreOptimizer.cpp RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp RISCVPostRAExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 7fb5fc7a831308a..f51a9205cbd4600 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -308,6 +308,7 @@ enum OperandType : unsigned { OPERAND_UIMM6_LSB0, OPERAND_UIMM7, OPERAND_UIMM7_LSB00, + OPERAND_UIMM7_LSB000, OPERAND_UIMM8_LSB00, OPERAND_UIMM8, OPERAND_UIMM8_LSB000, diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index d7bab601d545ccb..b1aee98739e8521 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -84,6 +84,8 @@ void initializeRISCVMoveMergePass(PassRegistry &); FunctionPass *createRISCVPushPopOptimizationPass(); void initializeRISCVPushPopOptPass(PassRegistry &); +FunctionPass *createRISCVLoadStoreOptPass(); +void initializeRISCVLoadStoreOptPass(PassRegistry &); FunctionPass *createRISCVZacasABIFixPass(); void initializeRISCVZacasABIFixPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 916b140c5bde758..9ba2c92cc90d1af 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1395,6 +1395,19 @@ def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion() def TuneMIPSP8700 : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700", "MIPS p8700 processor">; +def FeatureVendorMIPSCMove : SubtargetFeature<"xmipscmove", "HasVendorMIPSCMove", + "true", "Using CCMov", + [Feature64Bit]>; +def HasVendorMIPSCMove + : Predicate<"Subtarget->useCCMovInsn()">, + AssemblerPredicate<(all_of FeatureVendorMIPSCMove), "'ccmov' instruction">; +def FeatureVendorMIPSLoadStorePairs + : SubtargetFeature<"xmipslsp", "HasMIPSLSP", "true", + "Optimize for hardware load-store bonding">; +def HasVendorMIPSLoadStorePairs + : Predicate<"Subtarget->useLoadStorePairs()">, + AssemblerPredicate<(all_of FeatureVendorMIPSLoadStorePairs), + "load and store pair instructions">; def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", "SiFive 7-Series processors">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index cda64ae5f498d36..88dd9f0ec4f18c6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -409,7 +409,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ABS, MVT::i32, Custom); } - if (!Subtarget.hasVendorXTHeadCondMov()) + if (Subtarget.hasVendorMIPSCMove()) + setOperationAction(ISD::SELECT, XLenVT, Legal); + else if (!Subtarget.hasVendorXTHeadCondMov()) setOperationAction(ISD::SELECT, XLenVT, Custom); static const unsigned FPLegalNodeTypes[] = { diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 013c26c72bfd554..9ffed2c80ad6d3c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -514,6 +514,78 @@ class RVInstJ + : RVInst { + bits<7> imm7; + bits<5> rs1; + bits<5> rd1; + bits<5> rd2; + + let Inst{31-27} = rd2; + let Inst{26-23} = imm7{6-3}; + let Inst{22-20} = 0b000; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b100; + let Inst{11-7} = rd1; + let Inst{6-0} = 0b0001011; +} + +// Load word pair format. +class LWPFormat + : RVInst { + bits<7> imm7; + bits<5> rs1; + bits<5> rd1; + bits<5> rd2; + + let Inst{31-27} = rd2; + let Inst{26-22} = imm7{6-2}; + let Inst{21-20} = 0b01; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b100; + let Inst{11-7} = rd1; + let Inst{6-0} = 0b0001011; +} + +// Store double pair format. +class SDPFormat + : RVInst { + bits<7> imm7; + bits<5> rs3; + bits<5> rs2; + bits<5> rs1; + + let Inst{31-27} = rs3; + let Inst{26-25} = imm7{6-5}; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b101; + let Inst{11-10} = imm7{4-3}; + let Inst{9-0} = 0b0000001011; +} + +// Store word pair format. +class SWPFormat + : RVInst { + bits<7> imm7; + bits<5> rs3; + bits<5> rs2; + bits<5> rs1; + + let Inst{31-27} = rs3; + let Inst{26-25} = imm7{6-5}; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b101; + let Inst{11-9} = imm7{4-2}; + let Inst{8-0} = 0b010001011; +} + //===----------------------------------------------------------------------===// // Instruction classes for .insn directives //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index f24940795e433f9..c92c8e8077c7a3a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2488,6 +2488,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_UIMM7_LSB00: Ok = isShiftedUInt<5, 2>(Imm); break; + case RISCVOp::OPERAND_UIMM7_LSB000: + Ok = isShiftedUInt<4, 3>(Imm); + break; case RISCVOp::OPERAND_UIMM8_LSB00: Ok = isShiftedUInt<6, 2>(Imm); break; @@ -2734,6 +2737,46 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, .setMIFlags(MemI.getFlags()); } +bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) { + switch (Opc) { + default: + return false; + case RISCV::SH: + case RISCV::LH: + case RISCV::LHU: + case RISCV::SW: + case RISCV::FSW: + case RISCV::LW: + case RISCV::FLW: + case RISCV::SD: + case RISCV::FSD: + case RISCV::LD: + case RISCV::FLD: + return true; + } +} + +bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI) { + // If this is a volatile load/store, don't mess with it. + if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3) + return false; + + if (LdSt.getOperand(1).isFI()) + return true; + + assert(LdSt.getOperand(1).isReg() && "Expected a reg operand."); + // Can't cluster if the instruction modifies the base register + // or it is update form. e.g. ld x5,8(x5) + if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI)) + return false; + + if (!LdSt.getOperand(2).isImm()) + return false; + + return true; +} + bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 7e8bcd451a8ef8a..ef81c2d4397f265 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -301,6 +301,12 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { std::unique_ptr analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; + /// Return true if pairing the given load or store may be paired with another. + static bool isPairableLdStInstOpc(unsigned Opc); + + static bool isLdStSafeToPair(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI); + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 1260f99ad9dcd03..3e29e77aa3db6f7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -257,6 +257,146 @@ def simm12 : RISCVSImmLeafOp<12> { }]; } +// A 7-bit unsigned immediate where the least significant two bits are zero. +def uimm7_lsb00 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<7, "Lsb00">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<7>"; + let OperandType = "OPERAND_UIMM7_LSB00"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<5, 2>(Imm); + }]; +} + +// A 7-bit unsigned immediate where the least significant three bits are zero. +def uimm7_lsb000 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<7, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<7>"; + let OperandType = "OPERAND_UIMM7_LSB000"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<4, 3>(Imm); + }]; +} + +// A 8-bit unsigned immediate where the least significant two bits are zero. +def uimm8_lsb00 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<8, "Lsb00">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<8>"; + let OperandType = "OPERAND_UIMM8_LSB00"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<6, 2>(Imm); + }]; +} + +// A 8-bit unsigned immediate where the least significant three bits are zero. +def uimm8_lsb000 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<8, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<8>"; + let OperandType = "OPERAND_UIMM8_LSB000"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<5, 3>(Imm); + }]; +} + +// A 9-bit signed immediate where the least significant bit is zero. +def simm9_lsb0 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getImmOpValueAsr1"; + let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; + let MCOperandPredicate = [{ + int64_t Imm; + if (MCOp.evaluateAsConstantImm(Imm)) + return isShiftedInt<8, 1>(Imm); + return MCOp.isBareSymbolRef(); + }]; + let OperandType = "OPERAND_PCREL"; +} + +// A 9-bit unsigned immediate where the least significant three bits are zero. +def uimm9_lsb000 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<9, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<9>"; + let OperandType = "OPERAND_UIMM9_LSB000"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<6, 3>(Imm); + }]; +} + +// A 10-bit unsigned immediate where the least significant two bits are zero +// and the immediate can't be zero. +def uimm10_lsb00nonzero : RISCVOp, + ImmLeaf(Imm) && (Imm != 0);}]> { + let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmNonZeroOperand<10>"; + let OperandType = "OPERAND_UIMM10_LSB00_NONZERO"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<8, 2>(Imm) && (Imm != 0); + }]; +} + +// A 10-bit signed immediate where the least significant four bits are zero. +def simm10_lsb0000nonzero : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeSImmNonZeroOperand<10>"; + let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedInt<6, 4>(Imm) && (Imm != 0); + }]; +} + +// A 12-bit signed immediate where the least significant bit is zero. +def simm12_lsb0 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getImmOpValueAsr1"; + let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; + let MCOperandPredicate = [{ + int64_t Imm; + if (MCOp.evaluateAsConstantImm(Imm)) + return isShiftedInt<11, 1>(Imm); + return MCOp.isBareSymbolRef(); + }]; + let OperandType = "OPERAND_PCREL"; +} + // A 12-bit signed immediate which cannot fit in 6-bit signed immediate, // but even negative value fit in 12-bit. def simm12_no6 : ImmLeaf { // Standalone (codegen-only) immleaf patterns. +// A 12-bit signed immediate plus one where the imm range will be -2047~2048. +def simm12_plus1 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; + // A 6-bit constant greater than 32. def uimm6gt32 : ImmLeaf(Imm) && Imm > 32; @@ -2119,6 +2263,7 @@ include "RISCVInstrInfoSFB.td" include "RISCVInstrInfoXCV.td" include "RISCVInstrInfoXwch.td" include "RISCVInstrInfoXqci.td" +include "RISCVInstrInfoXMips.td" //===----------------------------------------------------------------------===// // Global ISel diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index ce994206cd785b0..84ecb95212d3ae9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -94,131 +94,6 @@ def c_lui_imm : RISCVOp, }]; } -// A 7-bit unsigned immediate where the least significant two bits are zero. -def uimm7_lsb00 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<7, "Lsb00">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<7>"; - let OperandType = "OPERAND_UIMM7_LSB00"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<5, 2>(Imm); - }]; -} - -// A 8-bit unsigned immediate where the least significant two bits are zero. -def uimm8_lsb00 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<8, "Lsb00">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<8>"; - let OperandType = "OPERAND_UIMM8_LSB00"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<6, 2>(Imm); - }]; -} - -// A 8-bit unsigned immediate where the least significant three bits are zero. -def uimm8_lsb000 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<8, "Lsb000">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<8>"; - let OperandType = "OPERAND_UIMM8_LSB000"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<5, 3>(Imm); - }]; -} - -// A 9-bit signed immediate where the least significant bit is zero. -def simm9_lsb0 : Operand, - ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; - let PrintMethod = "printBranchOperand"; - let EncoderMethod = "getImmOpValueAsr1"; - let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; - let MCOperandPredicate = [{ - int64_t Imm; - if (MCOp.evaluateAsConstantImm(Imm)) - return isShiftedInt<8, 1>(Imm); - return MCOp.isBareSymbolRef(); - }]; - let OperandType = "OPERAND_PCREL"; -} - -// A 9-bit unsigned immediate where the least significant three bits are zero. -def uimm9_lsb000 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<9, "Lsb000">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<9>"; - let OperandType = "OPERAND_UIMM9_LSB000"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<6, 3>(Imm); - }]; -} - -// A 10-bit unsigned immediate where the least significant two bits are zero -// and the immediate can't be zero. -def uimm10_lsb00nonzero : RISCVOp, - ImmLeaf(Imm) && (Imm != 0);}]> { - let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmNonZeroOperand<10>"; - let OperandType = "OPERAND_UIMM10_LSB00_NONZERO"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<8, 2>(Imm) && (Imm != 0); - }]; -} - -// A 10-bit signed immediate where the least significant four bits are zero. -def simm10_lsb0000nonzero : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeSImmNonZeroOperand<10>"; - let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedInt<6, 4>(Imm) && (Imm != 0); - }]; -} - -// A 12-bit signed immediate where the least significant bit is zero. -def simm12_lsb0 : Operand, - ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; - let PrintMethod = "printBranchOperand"; - let EncoderMethod = "getImmOpValueAsr1"; - let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; - let MCOperandPredicate = [{ - int64_t Imm; - if (MCOp.evaluateAsConstantImm(Imm)) - return isShiftedInt<11, 1>(Imm); - return MCOp.isBareSymbolRef(); - }]; - let OperandType = "OPERAND_PCREL"; -} - def InsnCDirectiveOpcode : AsmOperandClass { let Name = "InsnCDirectiveOpcode"; let ParserMethod = "parseInsnCDirectiveOpcode"; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td new file mode 100644 index 000000000000000..3bd3279f498283c --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td @@ -0,0 +1,82 @@ +//===-- RISCVInstrInfoXMips.td -----------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the vendor extensions defined by MIPS. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +// MIPS extensions +//===----------------------------------------------------------------------===// + +let Predicates = [HasVendorMIPSCMove], hasSideEffects = 0, mayLoad = 0, mayStore = 0, DecoderNamespace = "Xmipscomve" in { +def CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, GPR:$rs3), + "ccmov", "$rd, $rs2, $rs1, $rs3">, + Sched<[]>; +} + +let Predicates = [HasVendorMIPSCMove] in { +def : Pat<(select (XLenVT (setne (XLenVT GPR:$rs2), (XLenVT 0))), + (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq (XLenVT GPR:$rs2), (XLenVT 0))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))), + (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>; +def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setuge (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setge (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setle (XLenVT GPR:$y), (XLenVT GPR:$x))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT GPR:$rs2), (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +} + +let Predicates = [HasVendorMIPSLoadStorePairs], hasSideEffects = 0, DecoderNamespace = "Xmipslsp" in { +def LWP : LWPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb00:$imm7), + "lwp", "$rd1, $rd2, ${imm7}(${rs1})">, + Sched<[WriteLDW, WriteLDW, ReadMemBase]> { +let mayLoad = 1; +let mayStore = 0; +} +def LDP : LDPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb000:$imm7), + "ldp", "$rd1, $rd2, ${imm7}(${rs1})">, + Sched<[WriteLDD, WriteLDD, ReadMemBase]> { +let mayLoad = 1; +let mayStore = 0; +} +def SWP : SWPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb00:$imm7), + "swp", "$rs2, $rs3, ${imm7}(${rs1})">, + Sched<[WriteSTW, ReadStoreData, ReadStoreData, ReadMemBase]> { +let mayLoad = 0; +let mayStore = 1; +} +def SDP : SDPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb000:$imm7), + "sdp", "$rs2, $rs3, ${imm7}(${rs1})">, + Sched<[WriteSTD, ReadStoreData, ReadStoreData, ReadMemBase]> { +let mayLoad = 0; +let mayStore = 1; +} +} diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp new file mode 100644 index 000000000000000..b2575d54fc4a498 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp @@ -0,0 +1,370 @@ +//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bundle loads and stores that operate on consecutive memory locations to take +// the advantage of hardware load/store bonding. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-load-store-opt" +#define RISCV_LOAD_STORE_OPT_NAME "RISCV Load / Store Optimizer" +namespace { + +struct RISCVLoadStoreOpt : public MachineFunctionPass { + static char ID; + bool runOnMachineFunction(MachineFunction &Fn) override; + + RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } + + // Find and pair load/store instructions. + bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); + + // Convert load/store pairs to single instructions. + bool tryConvertToLdStPair(MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second); + + // Scan the instructions looking for a load/store that can be combined + // with the current instruction into a load/store pair. + // Return the matching instruction if one is found, else MBB->end(). + MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, + bool &MergeForward); + + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, bool MergeForward); + +private: + AliasAnalysis *AA; + MachineRegisterInfo *MRI; + const RISCVInstrInfo *TII; + const RISCVRegisterInfo *TRI; + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + bool UseLoadStorePair = false; +}; +} // end anonymous namespace + +char RISCVLoadStoreOpt::ID = 0; +INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, + false) + +bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + const RISCVSubtarget &Subtarget = Fn.getSubtarget(); + + if (!Subtarget.useLoadStorePairs()) + return false; + + bool MadeChange = false; + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + MRI = &Fn.getRegInfo(); + AA = &getAnalysis().getAAResults(); + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + UseLoadStorePair = Subtarget.useLoadStorePairs(); + + for (MachineBasicBlock &MBB : Fn) { + LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && + tryToPairLdStInst(MBBI)) + MadeChange = true; + else + ++MBBI; + } + } + return MadeChange; +} + +// Find loads and stores that can be merged into a single load or store pair +// instruction. +bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + + if (!TII->isLdStSafeToPair(MI, TRI)) + return false; + + // Look ahead for a pairable instruction. + bool MergeForward; + MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward); + if (Paired != E) { + MBBI = mergePairedInsns(MBBI, Paired, MergeForward); + return true; + } + return false; +} + +bool RISCVLoadStoreOpt::tryConvertToLdStPair( + MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { + if (!UseLoadStorePair) + return false; + + unsigned PairOpc; + // TODO: Handle the rest from RISCVInstrInfo::isPairableLdStInstOpc. + switch (First->getOpcode()) { + default: + return false; + case RISCV::SW: + PairOpc = RISCV::SWP; + break; + case RISCV::LW: + PairOpc = RISCV::LWP; + break; + case RISCV::SD: + PairOpc = RISCV::SDP; + break; + case RISCV::LD: + PairOpc = RISCV::LDP; + break; + } + + MachineFunction *MF = First->getMF(); + const MachineMemOperand *MMO = *First->memoperands_begin(); + Align MMOAlign = MMO->getAlign(); + if (const PseudoSourceValue *Source = MMO->getPseudoValue()) + if (Source->kind() == PseudoSourceValue::FixedStack) + MMOAlign = MF->getSubtarget().getFrameLowering()->getStackAlign(); + + if (MMOAlign < Align(MMO->getSize().getValue() * 2)) + return false; + int64_t Offset = First->getOperand(2).getImm(); + if (!isUInt<7>(Offset) || + !isAligned(Align(MMO->getSize().getValue()), Offset)) + return false; + MachineInstrBuilder MIB = BuildMI( + *MF, + First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(), + TII->get(PairOpc)); + MIB.add(First->getOperand(0)) + .add(Second->getOperand(0)) + .add(First->getOperand(1)) + .add(First->getOperand(2)) + .cloneMergedMemRefs({&*First, &*Second}); + + First->getParent()->insert(First, MIB); + + First->removeFromParent(); + Second->removeFromParent(); + + return true; +} + +static bool mayAlias(MachineInstr &MIa, + SmallVectorImpl &MemInsns, + AliasAnalysis *AA) { + for (MachineInstr *MIb : MemInsns) + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) + return true; + + return false; +} + +// Scan the instructions looking for a load/store that can be combined with the +// current instruction into a wider equivalent or a load/store pair. +// TODO: Extend pairing logic to consider reordering both instructions +// to a safe "middle" position rather than only merging forward/backward. +// This requires more sophisticated checks for aliasing, register +// liveness, and potential scheduling hazards. +MachineBasicBlock::iterator +RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, + bool &MergeForward) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator MBBI = I; + MachineInstr &FirstMI = *I; + MBBI = next_nodbg(MBBI, E); + + bool MayLoad = FirstMI.mayLoad(); + Register Reg = FirstMI.getOperand(0).getReg(); + Register BaseReg = FirstMI.getOperand(1).getReg(); + int Offset = FirstMI.getOperand(2).getImm(); + int OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); + + MergeForward = false; + + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + // Remember any instructions that read/write memory between FirstMI and MI. + SmallVector MemInsns; + + for (unsigned Count = 0; MBBI != E && Count < 128; + MBBI = next_nodbg(MBBI, E)) { + MachineInstr &MI = *MBBI; + + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + if (MI.getOpcode() == FirstMI.getOpcode() && + TII->isLdStSafeToPair(MI, TRI)) { + Register MIBaseReg = MI.getOperand(1).getReg(); + int MIOffset = MI.getOperand(2).getImm(); + + if (BaseReg == MIBaseReg) { + + if ((Offset != MIOffset + OffsetStride) && + (Offset + OffsetStride != MIOffset)) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + MemInsns.push_back(&MI); + continue; + } + + // If the destination register of one load is the same register or a + // sub/super register of the other load, bail and keep looking. + if (MayLoad && + TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + MemInsns.push_back(&MI); + continue; + } + + // If the BaseReg has been modified, then we cannot do the optimization. + if (!ModifiedRegUnits.available(BaseReg)) + return E; + + // If the Rt of the second instruction was not modified or used between + // the two instructions and none of the instructions between the second + // and first alias with the second, we can combine the second into the + // first. + if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) && + !(MI.mayLoad() && + !UsedRegUnits.available(MI.getOperand(0).getReg())) && + !mayAlias(MI, MemInsns, AA)) { + + MergeForward = false; + return MBBI; + } + + // Likewise, if the Rt of the first instruction is not modified or used + // between the two instructions and none of the instructions between the + // first and the second alias with the first, we can combine the first + // into the second. + if (!(MayLoad && + !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) && + !mayAlias(FirstMI, MemInsns, AA)) { + + if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) { + MergeForward = true; + return MBBI; + } + } + // Unable to combine these instructions due to interference in between. + // Keep looking. + } + } + + // If the instruction wasn't a matching load or store. Stop searching if we + // encounter a call instruction that might modify memory. + if (MI.isCall()) + return E; + + // Update modified / uses register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + + // Otherwise, if the base register is modified, we have no match, so + // return early. + if (!ModifiedRegUnits.available(BaseReg)) + return E; + + // Update list of instructions that read/write memory. + if (MI.mayLoadOrStore()) + MemInsns.push_back(&MI); + } + return E; +} + +MachineBasicBlock::iterator __attribute__((noinline)) +RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + bool MergeForward) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + if (NextI == Paired) + NextI = next_nodbg(NextI, E); + + // Insert our new paired instruction after whichever of the paired + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; + int Offset = I->getOperand(2).getImm(); + int PairedOffset = Paired->getOperand(2).getImm(); + bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; + + if (!MergeForward) + Paired->getOperand(1).setIsKill(false); + + // Kill flags may become invalid when moving stores for pairing. + if (I->getOperand(0).isUse()) { + if (!MergeForward) { + // Clear kill flags on store if moving upwards. + I->getOperand(0).setIsKill(false); + Paired->getOperand(0).setIsKill(false); + } else { + // Clear kill flags of the first stores register. + Register Reg = I->getOperand(0).getReg(); + for (MachineInstr &MI : make_range(std::next(I), std::next(Paired))) + MI.clearRegisterKills(Reg, TRI); + } + } + + MachineInstr *ToInsert = DeletionPoint->removeFromParent(); + MachineBasicBlock &MBB = *InsertionPoint->getParent(); + MachineBasicBlock::iterator First, Second; + + if (!InsertAfter) { + First = MBB.insert(InsertionPoint, ToInsert); + Second = InsertionPoint; + } else { + Second = MBB.insertAfter(InsertionPoint, ToInsert); + First = InsertionPoint; + } + + if (!tryConvertToLdStPair(First, Second)) + finalizeBundle(MBB, First.getInstrIterator(), + std::next(Second).getInstrIterator()); + + LLVM_DEBUG(dbgs() << "Bonding pair load/store:\n "); + LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); + return NextI; +} + +// Returns an instance of the Load / Store Optimization pass. +FunctionPass *llvm::createRISCVLoadStoreOptPass() { + return new RISCVLoadStoreOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 61c7c21367036f1..110c7a65ae047ec 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -116,7 +116,9 @@ def MIPS_P8700 : RISCVProcessorModel<"mips-p8700", FeatureStdExtZba, FeatureStdExtZbb, FeatureStdExtZifencei, - FeatureStdExtZicsr], + FeatureStdExtZicsr, + FeatureVendorMIPSCMove, + FeatureVendorMIPSLoadStorePairs], [TuneMIPSP8700]>; def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32", diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 6e212dc58e6ddda..e6307086d93a314 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -62,6 +62,16 @@ static cl::opt RISCVMinimumJumpTableEntries( "riscv-min-jump-table-entries", cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on RISCV")); +static cl::opt UseLoadStorePairsOpt( + "riscv-load-store-pairs", + cl::desc("RISCV: Optimize for load-store bonding"), + cl::init(false), cl::Hidden); + +static cl::opt UseCCMovInsn( + "riscv-ccmov", + cl::desc("RISCV: Use 'ccmov' instruction"), + cl::init(true), cl::Hidden); + void RISCVSubtarget::anchor() {} RISCVSubtarget & @@ -238,3 +248,11 @@ void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy, Policy.OnlyBottomUp = false; } } + +bool RISCVSubtarget::useLoadStorePairs() const { + return UseLoadStorePairsOpt && HasMIPSLSP; +} + +bool RISCVSubtarget::useCCMovInsn() const { + return UseCCMovInsn && HasVendorMIPSCMove; +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 87d508c3941737d..8bec6edb324b14f 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -188,6 +188,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned getXLen() const { return is64Bit() ? 64 : 32; } + bool useLoadStorePairs() const; + bool useCCMovInsn() const; unsigned getFLen() const { if (HasStdExtD) return 64; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index f6ccbfbe217df61..4c8cd97c698ffdf 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -143,6 +143,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVDAGToDAGISelLegacyPass(*PR); initializeRISCVMoveMergePass(*PR); initializeRISCVPushPopOptPass(*PR); + initializeRISCVLoadStoreOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT, @@ -389,6 +390,13 @@ class RISCVPassConfig : public TargetPassConfig { DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); } + + const RISCVSubtarget &ST = C->MF->getSubtarget(); + if (!ST.getMacroFusions().empty() && ST.useLoadStorePairs()) { + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + } + return DAG; } @@ -548,6 +556,8 @@ void RISCVPassConfig::addPreSched2() { // Emit KCFI checks for indirect calls. addPass(createKCFIPass()); + if (TM->getOptLevel() != CodeGenOptLevel::None) + addPass(createRISCVLoadStoreOptPass()); } void RISCVPassConfig::addPreEmitPass() { @@ -561,6 +571,11 @@ void RISCVPassConfig::addPreEmitPass() { addPass(createMachineCopyPropagationPass(true)); addPass(&BranchRelaxationPassID); addPass(createRISCVMakeCompressibleOptPass()); + + // LoadStoreOptimizer creates bundles for load-store bonding. + addPass(createUnpackMachineBundles([](const MachineFunction &MF) { + return MF.getSubtarget().useLoadStorePairs(); + })); } void RISCVPassConfig::addPreEmitPass2() { diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index f60def9d546f810..5ee6c192b80291f 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -63,6 +63,7 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible +; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index b0c756e26985bb5..473c41109671b66 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -177,6 +177,7 @@ ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks +; CHECK-NEXT: RISCV Load / Store Optimizer ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: PostRA Machine Instruction Scheduler @@ -190,6 +191,7 @@ ; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible +; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll new file mode 100644 index 000000000000000..76649b831f266ae --- /dev/null +++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll @@ -0,0 +1,509 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32D +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D +; RUN: llc -mtriple=riscv32 -mattr=+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I_PAIR +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d,+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32D_PAIR +; RUN: llc -mtriple=riscv64 -mattr=+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I_PAIR +; RUN: llc -mtriple=riscv64 -mcpu mips-p8700 -mattr=+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64P_8700 +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d,+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D_PAIR +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D_8700 + +define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_addr #0 { +; RV32I-LABEL: testi: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset s2, -4 +; RV32I-NEXT: .cfi_offset s3, -8 +; RV32I-NEXT: .cfi_offset s4, -12 +; RV32I-NEXT: .cfi_offset s5, -16 +; RV32I-NEXT: lw s3, 0(a0) +; RV32I-NEXT: lw s2, 4(a0) +; RV32I-NEXT: lw s5, 8(a0) +; RV32I-NEXT: lw s4, 12(a0) +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32D-LABEL: testi: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: addi sp, sp, -16 +; RV32D-NEXT: .cfi_def_cfa_offset 16 +; RV32D-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32D-NEXT: .cfi_offset s2, -4 +; RV32D-NEXT: .cfi_offset s3, -8 +; RV32D-NEXT: .cfi_offset s4, -12 +; RV32D-NEXT: .cfi_offset s5, -16 +; RV32D-NEXT: lw s3, 0(a0) +; RV32D-NEXT: lw s2, 4(a0) +; RV32D-NEXT: lw s5, 8(a0) +; RV32D-NEXT: lw s4, 12(a0) +; RV32D-NEXT: #APP +; RV32D-NEXT: #NO_APP +; RV32D-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32D-NEXT: .cfi_restore s2 +; RV32D-NEXT: .cfi_restore s3 +; RV32D-NEXT: .cfi_restore s4 +; RV32D-NEXT: .cfi_restore s5 +; RV32D-NEXT: addi sp, sp, 16 +; RV32D-NEXT: .cfi_def_cfa_offset 0 +; RV32D-NEXT: ret +; +; RV64I-LABEL: testi: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset s2, -8 +; RV64I-NEXT: .cfi_offset s3, -16 +; RV64I-NEXT: .cfi_offset s4, -24 +; RV64I-NEXT: .cfi_offset s5, -32 +; RV64I-NEXT: ld s3, 0(a0) +; RV64I-NEXT: ld s2, 8(a0) +; RV64I-NEXT: ld s5, 16(a0) +; RV64I-NEXT: ld s4, 24(a0) +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64D-LABEL: testi: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: addi sp, sp, -32 +; RV64D-NEXT: .cfi_def_cfa_offset 32 +; RV64D-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64D-NEXT: .cfi_offset s2, -8 +; RV64D-NEXT: .cfi_offset s3, -16 +; RV64D-NEXT: .cfi_offset s4, -24 +; RV64D-NEXT: .cfi_offset s5, -32 +; RV64D-NEXT: ld s3, 0(a0) +; RV64D-NEXT: ld s2, 8(a0) +; RV64D-NEXT: ld s5, 16(a0) +; RV64D-NEXT: ld s4, 24(a0) +; RV64D-NEXT: #APP +; RV64D-NEXT: #NO_APP +; RV64D-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64D-NEXT: .cfi_restore s2 +; RV64D-NEXT: .cfi_restore s3 +; RV64D-NEXT: .cfi_restore s4 +; RV64D-NEXT: .cfi_restore s5 +; RV64D-NEXT: addi sp, sp, 32 +; RV64D-NEXT: .cfi_def_cfa_offset 0 +; RV64D-NEXT: ret +; +; RV32I_PAIR-LABEL: testi: +; RV32I_PAIR: # %bb.0: # %entry +; RV32I_PAIR-NEXT: addi sp, sp, -16 +; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 16 +; RV32I_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill +; RV32I_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill +; RV32I_PAIR-NEXT: .cfi_offset s2, -4 +; RV32I_PAIR-NEXT: .cfi_offset s3, -8 +; RV32I_PAIR-NEXT: .cfi_offset s4, -12 +; RV32I_PAIR-NEXT: .cfi_offset s5, -16 +; RV32I_PAIR-NEXT: lwp s3, s2, 0(a0) +; RV32I_PAIR-NEXT: lwp s5, s4, 8(a0) +; RV32I_PAIR-NEXT: #APP +; RV32I_PAIR-NEXT: #NO_APP +; RV32I_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload +; RV32I_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload +; RV32I_PAIR-NEXT: .cfi_restore s2 +; RV32I_PAIR-NEXT: .cfi_restore s3 +; RV32I_PAIR-NEXT: .cfi_restore s4 +; RV32I_PAIR-NEXT: .cfi_restore s5 +; RV32I_PAIR-NEXT: addi sp, sp, 16 +; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV32I_PAIR-NEXT: ret +; +; RV32D_PAIR-LABEL: testi: +; RV32D_PAIR: # %bb.0: # %entry +; RV32D_PAIR-NEXT: addi sp, sp, -16 +; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 16 +; RV32D_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill +; RV32D_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill +; RV32D_PAIR-NEXT: .cfi_offset s2, -4 +; RV32D_PAIR-NEXT: .cfi_offset s3, -8 +; RV32D_PAIR-NEXT: .cfi_offset s4, -12 +; RV32D_PAIR-NEXT: .cfi_offset s5, -16 +; RV32D_PAIR-NEXT: lwp s3, s2, 0(a0) +; RV32D_PAIR-NEXT: lwp s5, s4, 8(a0) +; RV32D_PAIR-NEXT: #APP +; RV32D_PAIR-NEXT: #NO_APP +; RV32D_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload +; RV32D_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload +; RV32D_PAIR-NEXT: .cfi_restore s2 +; RV32D_PAIR-NEXT: .cfi_restore s3 +; RV32D_PAIR-NEXT: .cfi_restore s4 +; RV32D_PAIR-NEXT: .cfi_restore s5 +; RV32D_PAIR-NEXT: addi sp, sp, 16 +; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV32D_PAIR-NEXT: ret +; +; RV64I_PAIR-LABEL: testi: +; RV64I_PAIR: # %bb.0: # %entry +; RV64I_PAIR-NEXT: addi sp, sp, -32 +; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 32 +; RV64I_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill +; RV64I_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill +; RV64I_PAIR-NEXT: .cfi_offset s2, -8 +; RV64I_PAIR-NEXT: .cfi_offset s3, -16 +; RV64I_PAIR-NEXT: .cfi_offset s4, -24 +; RV64I_PAIR-NEXT: .cfi_offset s5, -32 +; RV64I_PAIR-NEXT: ld s3, 0(a0) +; RV64I_PAIR-NEXT: ld s2, 8(a0) +; RV64I_PAIR-NEXT: ld s5, 16(a0) +; RV64I_PAIR-NEXT: ld s4, 24(a0) +; RV64I_PAIR-NEXT: #APP +; RV64I_PAIR-NEXT: #NO_APP +; RV64I_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload +; RV64I_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload +; RV64I_PAIR-NEXT: .cfi_restore s2 +; RV64I_PAIR-NEXT: .cfi_restore s3 +; RV64I_PAIR-NEXT: .cfi_restore s4 +; RV64I_PAIR-NEXT: .cfi_restore s5 +; RV64I_PAIR-NEXT: addi sp, sp, 32 +; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV64I_PAIR-NEXT: ret +; +; RV64P_8700-LABEL: testi: +; RV64P_8700: # %bb.0: # %entry +; RV64P_8700-NEXT: addi sp, sp, -32 +; RV64P_8700-NEXT: .cfi_def_cfa_offset 32 +; RV64P_8700-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill +; RV64P_8700-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill +; RV64P_8700-NEXT: .cfi_offset s2, -8 +; RV64P_8700-NEXT: .cfi_offset s3, -16 +; RV64P_8700-NEXT: .cfi_offset s4, -24 +; RV64P_8700-NEXT: .cfi_offset s5, -32 +; RV64P_8700-NEXT: ld s3, 0(a0) +; RV64P_8700-NEXT: ld s2, 8(a0) +; RV64P_8700-NEXT: ld s5, 16(a0) +; RV64P_8700-NEXT: ld s4, 24(a0) +; RV64P_8700-NEXT: #APP +; RV64P_8700-NEXT: #NO_APP +; RV64P_8700-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload +; RV64P_8700-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload +; RV64P_8700-NEXT: .cfi_restore s2 +; RV64P_8700-NEXT: .cfi_restore s3 +; RV64P_8700-NEXT: .cfi_restore s4 +; RV64P_8700-NEXT: .cfi_restore s5 +; RV64P_8700-NEXT: addi sp, sp, 32 +; RV64P_8700-NEXT: .cfi_def_cfa_offset 0 +; RV64P_8700-NEXT: ret +; +; RV64D_PAIR-LABEL: testi: +; RV64D_PAIR: # %bb.0: # %entry +; RV64D_PAIR-NEXT: addi sp, sp, -32 +; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 32 +; RV64D_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill +; RV64D_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill +; RV64D_PAIR-NEXT: .cfi_offset s2, -8 +; RV64D_PAIR-NEXT: .cfi_offset s3, -16 +; RV64D_PAIR-NEXT: .cfi_offset s4, -24 +; RV64D_PAIR-NEXT: .cfi_offset s5, -32 +; RV64D_PAIR-NEXT: ld s3, 0(a0) +; RV64D_PAIR-NEXT: ld s2, 8(a0) +; RV64D_PAIR-NEXT: ld s5, 16(a0) +; RV64D_PAIR-NEXT: ld s4, 24(a0) +; RV64D_PAIR-NEXT: #APP +; RV64D_PAIR-NEXT: #NO_APP +; RV64D_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload +; RV64D_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload +; RV64D_PAIR-NEXT: .cfi_restore s2 +; RV64D_PAIR-NEXT: .cfi_restore s3 +; RV64D_PAIR-NEXT: .cfi_restore s4 +; RV64D_PAIR-NEXT: .cfi_restore s5 +; RV64D_PAIR-NEXT: addi sp, sp, 32 +; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV64D_PAIR-NEXT: ret +; +; RV64D_8700-LABEL: testi: +; RV64D_8700: # %bb.0: # %entry +; RV64D_8700-NEXT: addi sp, sp, -32 +; RV64D_8700-NEXT: .cfi_def_cfa_offset 32 +; RV64D_8700-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: .cfi_offset s2, -8 +; RV64D_8700-NEXT: .cfi_offset s3, -16 +; RV64D_8700-NEXT: .cfi_offset s4, -24 +; RV64D_8700-NEXT: .cfi_offset s5, -32 +; RV64D_8700-NEXT: ld s3, 0(a0) +; RV64D_8700-NEXT: ld s2, 8(a0) +; RV64D_8700-NEXT: ld s5, 16(a0) +; RV64D_8700-NEXT: ld s4, 24(a0) +; RV64D_8700-NEXT: #APP +; RV64D_8700-NEXT: #NO_APP +; RV64D_8700-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: .cfi_restore s2 +; RV64D_8700-NEXT: .cfi_restore s3 +; RV64D_8700-NEXT: .cfi_restore s4 +; RV64D_8700-NEXT: .cfi_restore s5 +; RV64D_8700-NEXT: addi sp, sp, 32 +; RV64D_8700-NEXT: .cfi_def_cfa_offset 0 +; RV64D_8700-NEXT: ret +entry: + %arrayidx = getelementptr inbounds i8*, i8** %a, i64 1 + %0 = load i8*, i8** %arrayidx, align 8 + %1 = load i8*, i8** %a, align 8 + %arrayidx2 = getelementptr inbounds i8*, i8** %a, i64 3 + %2 = load i8*, i8** %arrayidx2, align 8 + %arrayidx3 = getelementptr inbounds i8*, i8** %a, i64 2 + %3 = load i8*, i8** %arrayidx3, align 8 + tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(i8* %0, i8* %1, i8* %2, i8* %3) + ret void +} + + +define dso_local void @testf(float* nocapture noundef readonly %a) local_unnamed_addr #0 { +; RV32I-LABEL: testf: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) +; RV32I-NEXT: lw a2, 8(a0) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: tail sinkf +; +; RV32D-LABEL: testf: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: flw fa3, 0(a0) +; RV32D-NEXT: flw fa0, 4(a0) +; RV32D-NEXT: flw fa2, 8(a0) +; RV32D-NEXT: flw fa1, 12(a0) +; RV32D-NEXT: tail sinkf +; +; RV64I-LABEL: testf: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lw a3, 0(a0) +; RV64I-NEXT: lw a4, 4(a0) +; RV64I-NEXT: lw a2, 8(a0) +; RV64I-NEXT: lw a1, 12(a0) +; RV64I-NEXT: mv a0, a4 +; RV64I-NEXT: tail sinkf +; +; RV64D-LABEL: testf: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: flw fa3, 0(a0) +; RV64D-NEXT: flw fa0, 4(a0) +; RV64D-NEXT: flw fa2, 8(a0) +; RV64D-NEXT: flw fa1, 12(a0) +; RV64D-NEXT: tail sinkf +; +; RV32I_PAIR-LABEL: testf: +; RV32I_PAIR: # %bb.0: # %entry +; RV32I_PAIR-NEXT: lw a3, 0(a0) +; RV32I_PAIR-NEXT: lw a4, 4(a0) +; RV32I_PAIR-NEXT: lw a2, 8(a0) +; RV32I_PAIR-NEXT: lw a1, 12(a0) +; RV32I_PAIR-NEXT: mv a0, a4 +; RV32I_PAIR-NEXT: tail sinkf +; +; RV32D_PAIR-LABEL: testf: +; RV32D_PAIR: # %bb.0: # %entry +; RV32D_PAIR-NEXT: flw fa3, 0(a0) +; RV32D_PAIR-NEXT: flw fa0, 4(a0) +; RV32D_PAIR-NEXT: flw fa2, 8(a0) +; RV32D_PAIR-NEXT: flw fa1, 12(a0) +; RV32D_PAIR-NEXT: tail sinkf +; +; RV64I_PAIR-LABEL: testf: +; RV64I_PAIR: # %bb.0: # %entry +; RV64I_PAIR-NEXT: lw a3, 0(a0) +; RV64I_PAIR-NEXT: lw a4, 4(a0) +; RV64I_PAIR-NEXT: lw a2, 8(a0) +; RV64I_PAIR-NEXT: lw a1, 12(a0) +; RV64I_PAIR-NEXT: mv a0, a4 +; RV64I_PAIR-NEXT: tail sinkf +; +; RV64P_8700-LABEL: testf: +; RV64P_8700: # %bb.0: # %entry +; RV64P_8700-NEXT: flw fa3, 0(a0) +; RV64P_8700-NEXT: flw fa0, 4(a0) +; RV64P_8700-NEXT: flw fa2, 8(a0) +; RV64P_8700-NEXT: flw fa1, 12(a0) +; RV64P_8700-NEXT: tail sinkf +; +; RV64D_PAIR-LABEL: testf: +; RV64D_PAIR: # %bb.0: # %entry +; RV64D_PAIR-NEXT: flw fa3, 0(a0) +; RV64D_PAIR-NEXT: flw fa0, 4(a0) +; RV64D_PAIR-NEXT: flw fa2, 8(a0) +; RV64D_PAIR-NEXT: flw fa1, 12(a0) +; RV64D_PAIR-NEXT: tail sinkf +; +; RV64D_8700-LABEL: testf: +; RV64D_8700: # %bb.0: # %entry +; RV64D_8700-NEXT: flw fa3, 0(a0) +; RV64D_8700-NEXT: flw fa0, 4(a0) +; RV64D_8700-NEXT: flw fa2, 8(a0) +; RV64D_8700-NEXT: flw fa1, 12(a0) +; RV64D_8700-NEXT: tail sinkf +entry: + %arrayidx = getelementptr inbounds float, float* %a, i64 1 + %0 = load float, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %a, i64 3 + %1 = load float, float* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 2 + %2 = load float, float* %arrayidx2, align 4 + %3 = load float, float* %a, align 4 + tail call void @sinkf(float noundef %0, float noundef %1, float noundef %2, float noundef %3) + ret void +} + +declare dso_local void @sinkf(float noundef, float noundef, float noundef, float noundef) local_unnamed_addr + +define dso_local void @testd(double* nocapture noundef readonly %a) local_unnamed_addr #0 { +; RV32I-LABEL: testd: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lw a4, 16(a0) +; RV32I-NEXT: lw a5, 20(a0) +; RV32I-NEXT: lw a2, 24(a0) +; RV32I-NEXT: lw a3, 28(a0) +; RV32I-NEXT: lw a6, 0(a0) +; RV32I-NEXT: lw a7, 4(a0) +; RV32I-NEXT: lw t0, 8(a0) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: mv a0, t0 +; RV32I-NEXT: tail sinkd +; +; RV32D-LABEL: testd: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fld fa3, 0(a0) +; RV32D-NEXT: fld fa0, 8(a0) +; RV32D-NEXT: fld fa2, 16(a0) +; RV32D-NEXT: fld fa1, 24(a0) +; RV32D-NEXT: tail sinkd +; +; RV64I-LABEL: testd: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 8(a0) +; RV64I-NEXT: ld a2, 16(a0) +; RV64I-NEXT: ld a1, 24(a0) +; RV64I-NEXT: mv a0, a4 +; RV64I-NEXT: tail sinkd +; +; RV64D-LABEL: testd: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: fld fa3, 0(a0) +; RV64D-NEXT: fld fa0, 8(a0) +; RV64D-NEXT: fld fa2, 16(a0) +; RV64D-NEXT: fld fa1, 24(a0) +; RV64D-NEXT: tail sinkd +; +; RV32I_PAIR-LABEL: testd: +; RV32I_PAIR: # %bb.0: # %entry +; RV32I_PAIR-NEXT: lwp a4, a5, 16(a0) +; RV32I_PAIR-NEXT: lwp a2, a3, 24(a0) +; RV32I_PAIR-NEXT: lwp a6, a7, 0(a0) +; RV32I_PAIR-NEXT: lwp a0, a1, 8(a0) +; RV32I_PAIR-NEXT: tail sinkd +; +; RV32D_PAIR-LABEL: testd: +; RV32D_PAIR: # %bb.0: # %entry +; RV32D_PAIR-NEXT: fld fa3, 0(a0) +; RV32D_PAIR-NEXT: fld fa0, 8(a0) +; RV32D_PAIR-NEXT: fld fa2, 16(a0) +; RV32D_PAIR-NEXT: fld fa1, 24(a0) +; RV32D_PAIR-NEXT: tail sinkd +; +; RV64I_PAIR-LABEL: testd: +; RV64I_PAIR: # %bb.0: # %entry +; RV64I_PAIR-NEXT: ld a3, 0(a0) +; RV64I_PAIR-NEXT: ld a4, 8(a0) +; RV64I_PAIR-NEXT: ld a2, 16(a0) +; RV64I_PAIR-NEXT: ld a1, 24(a0) +; RV64I_PAIR-NEXT: mv a0, a4 +; RV64I_PAIR-NEXT: tail sinkd +; +; RV64P_8700-LABEL: testd: +; RV64P_8700: # %bb.0: # %entry +; RV64P_8700-NEXT: fld fa3, 0(a0) +; RV64P_8700-NEXT: fld fa0, 8(a0) +; RV64P_8700-NEXT: fld fa2, 16(a0) +; RV64P_8700-NEXT: fld fa1, 24(a0) +; RV64P_8700-NEXT: tail sinkd +; +; RV64D_PAIR-LABEL: testd: +; RV64D_PAIR: # %bb.0: # %entry +; RV64D_PAIR-NEXT: fld fa3, 0(a0) +; RV64D_PAIR-NEXT: fld fa0, 8(a0) +; RV64D_PAIR-NEXT: fld fa2, 16(a0) +; RV64D_PAIR-NEXT: fld fa1, 24(a0) +; RV64D_PAIR-NEXT: tail sinkd +; +; RV64D_8700-LABEL: testd: +; RV64D_8700: # %bb.0: # %entry +; RV64D_8700-NEXT: fld fa3, 0(a0) +; RV64D_8700-NEXT: fld fa0, 8(a0) +; RV64D_8700-NEXT: fld fa2, 16(a0) +; RV64D_8700-NEXT: fld fa1, 24(a0) +; RV64D_8700-NEXT: tail sinkd +entry: + %arrayidx = getelementptr inbounds double, double* %a, i64 1 + %0 = load double, double* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds double, double* %a, i64 3 + %1 = load double, double* %arrayidx1, align 8 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 2 + %2 = load double, double* %arrayidx2, align 8 + %3 = load double, double* %a, align 8 + tail call void @sinkd(double noundef %0, double noundef %1, double noundef %2, double noundef %3) + ret void +} + +declare dso_local void @sinkd(double noundef, double noundef, double noundef, double noundef) local_unnamed_addr diff --git a/llvm/test/CodeGen/RISCV/select-and.ll b/llvm/test/CodeGen/RISCV/select-and.ll index d305993f0e966b7..56f6285ab055771 100644 --- a/llvm/test/CodeGen/RISCV/select-and.ll +++ b/llvm/test/CodeGen/RISCV/select-and.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s ;; There are a few different ways to lower (select (and A, B), X, Y). This test ;; ensures that we do so with as few branches as possible. @@ -27,6 +29,12 @@ define signext i32 @select_of_and(i1 zeroext %a, i1 zeroext %b, i32 signext %c, ; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_of_and: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: and a0, a0, a1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3 +; RV64I-CCMOV-NEXT: ret %1 = and i1 %a, %b %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 @@ -69,6 +77,23 @@ define signext i32 @if_of_and(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: if_of_and: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: addi sp, sp, -16 +; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: beqz a0, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.1: +; RV64I-CCMOV-NEXT: beqz a1, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.2: # %if.then +; RV64I-CCMOV-NEXT: call both +; RV64I-CCMOV-NEXT: j .LBB1_4 +; RV64I-CCMOV-NEXT: .LBB1_3: # %if.else +; RV64I-CCMOV-NEXT: call neither +; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end +; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: addi sp, sp, 16 +; RV64I-CCMOV-NEXT: ret %1 = and i1 %a, %b br i1 %1, label %if.then, label %if.else diff --git a/llvm/test/CodeGen/RISCV/select-bare.ll b/llvm/test/CodeGen/RISCV/select-bare.ll index cf8fe96742bfbda..4b49a626e28bee8 100644 --- a/llvm/test/CodeGen/RISCV/select-bare.ll +++ b/llvm/test/CodeGen/RISCV/select-bare.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-LABEL: bare_select: @@ -12,6 +14,12 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: ret +; +; RV64I-CCMOV-LABEL: bare_select: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: andi a0, a0, 1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %1 = select i1 %a, i32 %b, i32 %c ret i32 %1 } @@ -26,6 +34,12 @@ define float @bare_select_float(i1 %a, float %b, float %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB1_2: ; RV32I-NEXT: ret +; +; RV64I-CCMOV-LABEL: bare_select_float: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: andi a0, a0, 1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %1 = select i1 %a, float %b, float %c ret float %1 } diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll index 31e25702da8ba27..17afbd724e35254 100644 --- a/llvm/test/CodeGen/RISCV/select-cc.ll +++ b/llvm/test/CodeGen/RISCV/select-cc.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck -check-prefixes=RV32I %s ; RUN: llc -mtriple=riscv64 -disable-block-placement -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s define signext i32 @foo(i32 signext %a, ptr %b) nounwind { ; RV32I-LABEL: foo: @@ -156,6 +158,57 @@ define signext i32 @foo(i32 signext %a, ptr %b) nounwind { ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB0_28: ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: foo: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: lw a5, 0(a1) +; RV64I-CCMOV-NEXT: xor a6, a0, a2 +; RV64I-CCMOV-NEXT: ccmov a0, a6, a2, a0 +; RV64I-CCMOV-NEXT: xor a2, a0, a3 +; RV64I-CCMOV-NEXT: ccmov a0, a2, a0, a3 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sltu a3, a4, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a3, a0, a4 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sltu a4, a0, a5 +; RV64I-CCMOV-NEXT: ccmov a0, a4, a5, a0 +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: sltu a5, a0, a2 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sltu a5, a3, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a4, a5 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a4 +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a5, a2 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a2, a0 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a5, a3 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a3 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a4, a5 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a4, a0 +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: slti a5, a2, 1 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2 +; RV64I-CCMOV-NEXT: slti a5, a2, 0 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0 +; RV64I-CCMOV-NEXT: lw a1, 0(a1) +; RV64I-CCMOV-NEXT: slti a3, a4, 1025 +; RV64I-CCMOV-NEXT: ccmov a0, a3, a4, a0 +; RV64I-CCMOV-NEXT: sltiu a2, a2, 2047 +; RV64I-CCMOV-NEXT: ccmov a0, a2, a1, a0 +; RV64I-CCMOV-NEXT: sext.w a0, a0 +; RV64I-CCMOV-NEXT: ret %val1 = load volatile i32, ptr %b %tst1 = icmp eq i32 %a, %val1 %val2 = select i1 %tst1, i32 %a, i32 %val1 @@ -258,6 +311,23 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2, ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: numsignbits: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: addi sp, sp, -16 +; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: ccmov s0, a0, a2, a3 +; RV64I-CCMOV-NEXT: beqz a1, .LBB1_2 +; RV64I-CCMOV-NEXT: # %bb.1: +; RV64I-CCMOV-NEXT: mv a0, s0 +; RV64I-CCMOV-NEXT: call bar +; RV64I-CCMOV-NEXT: .LBB1_2: +; RV64I-CCMOV-NEXT: mv a0, s0 +; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: addi sp, sp, 16 +; RV64I-CCMOV-NEXT: ret %5 = icmp eq i16 %0, 0 %6 = select i1 %5, i16 %3, i16 %2 %7 = icmp eq i16 %1, 0 @@ -295,6 +365,14 @@ define i32 @select_sge_int16min(i32 signext %x, i32 signext %y, i32 signext %z) ; RV64I-NEXT: .LBB2_2: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_sge_int16min: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: lui a3, 1048560 +; RV64I-CCMOV-NEXT: addiw a3, a3, -1 +; RV64I-CCMOV-NEXT: slt a0, a3, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %a = icmp sge i32 %x, -65536 %b = select i1 %a, i32 %y, i32 %z ret i32 %b @@ -331,6 +409,14 @@ define i64 @select_sge_int32min(i64 %x, i64 %y, i64 %z) { ; RV64I-NEXT: .LBB3_2: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_sge_int32min: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: lui a3, 524288 +; RV64I-CCMOV-NEXT: addi a3, a3, -1 +; RV64I-CCMOV-NEXT: slt a0, a3, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %a = icmp sge i64 %x, -2147483648 %b = select i1 %a, i64 %y, i64 %z ret i64 %b diff --git a/llvm/test/CodeGen/RISCV/select-or.ll b/llvm/test/CodeGen/RISCV/select-or.ll index 20a5ec15290cdb0..b3e68a9282533eb 100644 --- a/llvm/test/CodeGen/RISCV/select-or.ll +++ b/llvm/test/CodeGen/RISCV/select-or.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s ;; There are a few different ways to lower (select (or A, B), X, Y). This test ;; ensures that we do so with as few branches as possible. @@ -27,6 +29,12 @@ define signext i32 @select_of_or(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i ; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_of_or: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: or a0, a0, a1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3 +; RV64I-CCMOV-NEXT: ret %1 = or i1 %a, %b %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 @@ -69,6 +77,23 @@ define signext i32 @if_of_or(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: if_of_or: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: addi sp, sp, -16 +; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: bnez a0, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.1: +; RV64I-CCMOV-NEXT: bnez a1, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.2: # %if.else +; RV64I-CCMOV-NEXT: call neither +; RV64I-CCMOV-NEXT: j .LBB1_4 +; RV64I-CCMOV-NEXT: .LBB1_3: # %if.then +; RV64I-CCMOV-NEXT: call either +; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end +; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: addi sp, sp, 16 +; RV64I-CCMOV-NEXT: ret %1 = or i1 %a, %b br i1 %1, label %if.then, label %if.else