diff --git a/llvm/include/llvm/CodeGen/BranchFolding.h b/llvm/include/llvm/CodeGen/BranchFolding.h new file mode 100644 index 00000000000000..f29071bcfcd869 --- /dev/null +++ b/llvm/include/llvm/CodeGen/BranchFolding.h @@ -0,0 +1,190 @@ +//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H +#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/Compiler.h" +#include +#include + +namespace llvm { + +class BasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineLoopInfo; +class MachineModuleInfo; +class MachineRegisterInfo; +class MBFIWrapper; +class ProfileSummaryInfo; +class TargetInstrInfo; +class TargetRegisterInfo; + +class LLVM_LIBRARY_VISIBILITY BranchFolder { +public: + explicit BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist, + MBFIWrapper &FreqInfo, + const MachineBranchProbabilityInfo &ProbInfo, + ProfileSummaryInfo *PSI, + // Min tail length to merge. Defaults to commandline + // flag. Ignored for optsize. + unsigned MinTailLength = 0); + + /// Perhaps branch folding, tail merging and other CFG optimizations on the + /// given function. Block placement changes the layout and may create new + /// tail merging opportunities. + bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineLoopInfo *mli = nullptr, + bool AfterPlacement = false); + +private: + class MergePotentialsElt { + unsigned Hash; + MachineBasicBlock *Block; + + public: + MergePotentialsElt(unsigned h, MachineBasicBlock *b) : Hash(h), Block(b) {} + + unsigned getHash() const { return Hash; } + MachineBasicBlock *getBlock() const { return Block; } + + void setBlock(MachineBasicBlock *MBB) { Block = MBB; } + + bool operator<(const MergePotentialsElt &) const; + }; + + using MPIterator = std::vector::iterator; + + std::vector MergePotentials; + SmallPtrSet TriedMerging; + DenseMap EHScopeMembership; + + class SameTailElt { + MPIterator MPIter; + MachineBasicBlock::iterator TailStartPos; + + public: + SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) + : MPIter(mp), TailStartPos(tsp) {} + + MPIterator getMPIter() const { return MPIter; } + + MergePotentialsElt &getMergePotentialsElt() const { return *getMPIter(); } + + MachineBasicBlock::iterator getTailStartPos() const { return TailStartPos; } + + unsigned getHash() const { return getMergePotentialsElt().getHash(); } + + MachineBasicBlock *getBlock() const { + return getMergePotentialsElt().getBlock(); + } + + bool tailIsWholeBlock() const { + return TailStartPos == getBlock()->begin(); + } + + void setBlock(MachineBasicBlock *MBB) { + getMergePotentialsElt().setBlock(MBB); + } + + void setTailStartPos(MachineBasicBlock::iterator Pos) { + TailStartPos = Pos; + } + }; + std::vector SameTails; + + bool AfterBlockPlacement; + bool EnableTailMerge; + bool EnableHoistCommonCode; + bool UpdateLiveIns; + unsigned MinCommonTailLength; + const TargetInstrInfo *TII; + const MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + MachineLoopInfo *MLI; + LivePhysRegs LiveRegs; + +private: + MBFIWrapper &MBBFreqInfo; + const MachineBranchProbabilityInfo &MBPI; + ProfileSummaryInfo *PSI; + + bool TailMergeBlocks(MachineFunction &MF); + bool TryTailMergeBlocks(MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB, + unsigned MinCommonTailLength); + void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); + + /// Delete the instruction OldInst and everything after it, replacing it + /// with an unconditional branch to NewDest. + void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock &NewDest); + + /// Given a machine basic block and an iterator into it, split the MBB so + /// that the part before the iterator falls into the part starting at the + /// iterator. This returns the new MBB. + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB); + + /// Look through all the blocks in MergePotentials that have hash CurHash + /// (guaranteed to match the last element). Build the vector SameTails of + /// all those that have the (same) largest number of instructions in common + /// of any pair of these blocks. SameTails entries contain an iterator into + /// MergePotentials (from which the MachineBasicBlock can be found) and a + /// MachineBasicBlock::iterator into that MBB indicating the instruction + /// where the matching code sequence begins. Order of elements in SameTails + /// is the reverse of the order in which those blocks appear in + /// MergePotentials (where they are not necessarily consecutive). + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB); + + /// Remove all blocks with hash CurHash from MergePotentials, restoring + /// branches at ends of blocks as appropriate. + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB); + + /// None of the blocks to be tail-merged consist only of the common tail. + /// Create a block that does by splitting one. + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); + + /// Create merged DebugLocs of identical instructions across SameTails and + /// assign it to the instruction in common tail; merge MMOs and undef flags. + void mergeCommonTails(unsigned commonTailIndex); + + bool OptimizeBranches(MachineFunction &MF); + + /// Analyze and optimize control flow related to the specified block. This + /// is never called on the entry block. + bool OptimizeBlock(MachineBasicBlock *MBB); + + /// Remove the specified dead machine basic block from the function, + /// updating the CFG. + void RemoveDeadBlock(MachineBasicBlock *MBB); + + /// Hoist common instruction sequences at the start of basic blocks to their + /// common predecessor. + bool HoistCommonCode(MachineFunction &MF); + + /// If the successors of MBB has common instruction sequence at the start of + /// the function, move the instructions before MBB terminator if it's legal. + bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_BRANCHFOLDING_H diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 65e7e92fe15213..d872209b13584b 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -16,7 +16,7 @@ // //===----------------------------------------------------------------------===// -#include "BranchFolding.h" +#include "llvm/CodeGen/BranchFolding.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h deleted file mode 100644 index 2a4ea92a92aa60..00000000000000 --- a/llvm/lib/CodeGen/BranchFolding.h +++ /dev/null @@ -1,202 +0,0 @@ -//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H -#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Support/Compiler.h" -#include -#include - -namespace llvm { - -class BasicBlock; -class MachineBranchProbabilityInfo; -class MachineFunction; -class MachineLoopInfo; -class MachineModuleInfo; -class MachineRegisterInfo; -class MBFIWrapper; -class ProfileSummaryInfo; -class TargetInstrInfo; -class TargetRegisterInfo; - - class LLVM_LIBRARY_VISIBILITY BranchFolder { - public: - explicit BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist, - MBFIWrapper &FreqInfo, - const MachineBranchProbabilityInfo &ProbInfo, - ProfileSummaryInfo *PSI, - // Min tail length to merge. Defaults to commandline - // flag. Ignored for optsize. - unsigned MinTailLength = 0); - - /// Perhaps branch folding, tail merging and other CFG optimizations on the - /// given function. Block placement changes the layout and may create new - /// tail merging opportunities. - bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, - const TargetRegisterInfo *tri, - MachineLoopInfo *mli = nullptr, - bool AfterPlacement = false); - - private: - class MergePotentialsElt { - unsigned Hash; - MachineBasicBlock *Block; - - public: - MergePotentialsElt(unsigned h, MachineBasicBlock *b) - : Hash(h), Block(b) {} - - unsigned getHash() const { return Hash; } - MachineBasicBlock *getBlock() const { return Block; } - - void setBlock(MachineBasicBlock *MBB) { - Block = MBB; - } - - bool operator<(const MergePotentialsElt &) const; - }; - - using MPIterator = std::vector::iterator; - - std::vector MergePotentials; - SmallPtrSet TriedMerging; - DenseMap EHScopeMembership; - - class SameTailElt { - MPIterator MPIter; - MachineBasicBlock::iterator TailStartPos; - - public: - SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) - : MPIter(mp), TailStartPos(tsp) {} - - MPIterator getMPIter() const { - return MPIter; - } - - MergePotentialsElt &getMergePotentialsElt() const { - return *getMPIter(); - } - - MachineBasicBlock::iterator getTailStartPos() const { - return TailStartPos; - } - - unsigned getHash() const { - return getMergePotentialsElt().getHash(); - } - - MachineBasicBlock *getBlock() const { - return getMergePotentialsElt().getBlock(); - } - - bool tailIsWholeBlock() const { - return TailStartPos == getBlock()->begin(); - } - - void setBlock(MachineBasicBlock *MBB) { - getMergePotentialsElt().setBlock(MBB); - } - - void setTailStartPos(MachineBasicBlock::iterator Pos) { - TailStartPos = Pos; - } - }; - std::vector SameTails; - - bool AfterBlockPlacement; - bool EnableTailMerge; - bool EnableHoistCommonCode; - bool UpdateLiveIns; - unsigned MinCommonTailLength; - const TargetInstrInfo *TII; - const MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - MachineLoopInfo *MLI; - LivePhysRegs LiveRegs; - - private: - MBFIWrapper &MBBFreqInfo; - const MachineBranchProbabilityInfo &MBPI; - ProfileSummaryInfo *PSI; - - bool TailMergeBlocks(MachineFunction &MF); - bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB, - unsigned MinCommonTailLength); - void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); - - /// Delete the instruction OldInst and everything after it, replacing it - /// with an unconditional branch to NewDest. - void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock &NewDest); - - /// Given a machine basic block and an iterator into it, split the MBB so - /// that the part before the iterator falls into the part starting at the - /// iterator. This returns the new MBB. - MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1, - const BasicBlock *BB); - - /// Look through all the blocks in MergePotentials that have hash CurHash - /// (guaranteed to match the last element). Build the vector SameTails of - /// all those that have the (same) largest number of instructions in common - /// of any pair of these blocks. SameTails entries contain an iterator into - /// MergePotentials (from which the MachineBasicBlock can be found) and a - /// MachineBasicBlock::iterator into that MBB indicating the instruction - /// where the matching code sequence begins. Order of elements in SameTails - /// is the reverse of the order in which those blocks appear in - /// MergePotentials (where they are not necessarily consecutive). - unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, - MachineBasicBlock *SuccBB, - MachineBasicBlock *PredBB); - - /// Remove all blocks with hash CurHash from MergePotentials, restoring - /// branches at ends of blocks as appropriate. - void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); - - /// None of the blocks to be tail-merged consist only of the common tail. - /// Create a block that does by splitting one. - bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - MachineBasicBlock *SuccBB, - unsigned maxCommonTailLength, - unsigned &commonTailIndex); - - /// Create merged DebugLocs of identical instructions across SameTails and - /// assign it to the instruction in common tail; merge MMOs and undef flags. - void mergeCommonTails(unsigned commonTailIndex); - - bool OptimizeBranches(MachineFunction &MF); - - /// Analyze and optimize control flow related to the specified block. This - /// is never called on the entry block. - bool OptimizeBlock(MachineBasicBlock *MBB); - - /// Remove the specified dead machine basic block from the function, - /// updating the CFG. - void RemoveDeadBlock(MachineBasicBlock *MBB); - - /// Hoist common instruction sequences at the start of basic blocks to their - /// common predecessor. - bool HoistCommonCode(MachineFunction &MF); - - /// If the successors of MBB has common instruction sequence at the start of - /// the function, move the instructions before MBB terminator if it's legal. - bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); - }; - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_BRANCHFOLDING_H diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 681e2f3dc848bd..31ffa8e8562026 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "BranchFolding.h" +#include "llvm/CodeGen/BranchFolding.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index f61142d202eb64..cac1863270bb1b 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -24,7 +24,6 @@ // //===----------------------------------------------------------------------===// -#include "BranchFolding.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -34,6 +33,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/BranchFolding.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" diff --git a/llvm/lib/Target/Mips/CMakeLists.txt b/llvm/lib/Target/Mips/CMakeLists.txt index 2208e93466465e..9b6dc60fab160b 100644 --- a/llvm/lib/Target/Mips/CMakeLists.txt +++ b/llvm/lib/Target/Mips/CMakeLists.txt @@ -59,9 +59,9 @@ add_llvm_target(MipsCodeGen MipsTargetMachine.cpp MipsTargetObjectFile.cpp MicroMipsSizeReduction.cpp - NanoMipsCompressJumpTables.cpp NanoMipsLoadStoreOptimizer.cpp NanoMipsMoveOptimizer.cpp + NanoMipsOptimizeJumpTables.cpp NanoMipsRegisterReAllocation.cpp NanoMipsRedundantCopyElimination.cpp NanoMipsTargetTransformInfo.cpp diff --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h index 5305b05622f82a..a071b0f6e26f09 100644 --- a/llvm/lib/Target/Mips/Mips.h +++ b/llvm/lib/Target/Mips/Mips.h @@ -38,7 +38,7 @@ namespace llvm { FunctionPass *createMicroMipsSizeReducePass(); FunctionPass *createMipsExpandPseudoPass(); FunctionPass *createMipsPreLegalizeCombiner(); - FunctionPass *createNanoMipsCompressJumpTablesPass(); + FunctionPass *createNanoMipsOptimizeJumpTablesPass(); FunctionPass *createNanoMipsLoadStoreOptimizerPass(); FunctionPass *createNanoMipsMoveOptimizerPass(); FunctionPass *createNanoMipsRegisterReAllocationPass(); @@ -52,6 +52,7 @@ namespace llvm { void initializeMipsBranchExpansionPass(PassRegistry &); void initializeMicroMipsSizeReducePass(PassRegistry &); void initializeMipsPreLegalizerCombinerPass(PassRegistry&); + void initializeNMOptimizeJumpTablesPass (PassRegistry&); void initializeNanoMipsRegisterReAllocPass(PassRegistry &); void initializeRedundantCopyEliminationPass(PassRegistry&); } // end namespace llvm; diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp index ec5514c97ff999..1104a4f71f2593 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -60,6 +60,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() { initializeMipsBranchExpansionPass(*PR); initializeMicroMipsSizeReducePass(*PR); initializeMipsPreLegalizerCombinerPass(*PR); + initializeNMOptimizeJumpTablesPass(*PR); initializeRedundantCopyEliminationPass(*PR); } @@ -334,7 +335,7 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) { // machine code is emitted. void MipsPassConfig::addPreEmitPass() { if (getMipsSubtarget().hasNanoMips()) - addPass(createNanoMipsCompressJumpTablesPass()); + addPass(createNanoMipsOptimizeJumpTablesPass()); // Expand pseudo instructions that are sensitive to register allocation. addPass(createMipsExpandPseudoPass()); diff --git a/llvm/lib/Target/Mips/NanoMipsCompressJumpTables.cpp b/llvm/lib/Target/Mips/NanoMipsCompressJumpTables.cpp deleted file mode 100644 index 52672e10e4c9cb..00000000000000 --- a/llvm/lib/Target/Mips/NanoMipsCompressJumpTables.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===- NanoMipsCompressJumpTables.cpp - nanoMIPS compress JTs --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file contains a pass that compresses Jump Table entries, whenever -/// possible. Jump table entries used to be fixed size(4B). They used to -/// represent absolute addresses. We want to compress those entries by filling -/// them with specific offsets. Having offsets instead of absolute addresses -/// saves at least 2B per entry. This pass checks if one or two bytes are -/// sufficient for the offset value. -// -//===----------------------------------------------------------------------===// - -#include "Mips.h" -#include "MipsMachineFunction.h" -#include "MipsSubtarget.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" - -#include - -using namespace llvm; - -#define NM_COMPRESS_JUMP_TABLES_OPT_NAME \ - "nanoMIPS compress jump tables optimization pass" - -static cl::opt NMForce16BitJumpTables( - "nmips-force-16-bit-jump-table", cl::Hidden, cl::init(true), - cl::desc("Force all jump tables to have 16bit wide entries")); - -namespace { -struct NMCompressJumpTables : public MachineFunctionPass { - static char ID; - const MipsSubtarget *STI; - const TargetInstrInfo *TII; - MachineFunction *MF; - SmallVector BlockInfo; - SmallVector BrOffsets; - - int computeBlockSize(MachineBasicBlock &MBB); - void scanFunction(); - bool compressJumpTable(MachineInstr &MI, int Offset); - - NMCompressJumpTables() : MachineFunctionPass(ID) {} - StringRef getPassName() const override { - return NM_COMPRESS_JUMP_TABLES_OPT_NAME; - } - bool runOnMachineFunction(MachineFunction &Fn) override; -}; -} // namespace - -char NMCompressJumpTables::ID = 0; - -// TODO: Currently, there is no existing LLVM interface which we can use to tell the -// maximum potential size of a MachineInstr. Once we have it, this should be -// enhanced. -int NMCompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) { - int Size = 0; - for (const MachineInstr &MI : MBB) - Size += TII->getInstSizeInBytes(MI); - return Size; -} - -void NMCompressJumpTables::scanFunction() { - BlockInfo.clear(); - BlockInfo.resize(MF->getNumBlockIDs()); - BrOffsets.clear(); - bool findBR = MF->getJumpTableInfo() && - !MF->getJumpTableInfo()->getJumpTables().empty(); - if (findBR) - BrOffsets.resize(MF->getJumpTableInfo()->getJumpTables().size()); - int Offset = 0; - for (MachineBasicBlock &MBB : *MF) { - BlockInfo[MBB.getNumber()] = Offset; - Offset += computeBlockSize(MBB); - if (findBR) - for (auto &MI : MBB) { - if (MI.getOpcode() == Mips::BRSC_NM) { - int JTIdx = MI.getOperand(1).getIndex(); - BrOffsets[JTIdx] = Offset; - break; - } - } - } -} - -bool NMCompressJumpTables::compressJumpTable(MachineInstr &MI, int Offset) { - if (MI.getOpcode() != Mips::LoadJumpTableOffset) - return false; - - int JTIdx = MI.getOperand(3).getIndex(); - auto &JTInfo = *MF->getJumpTableInfo(); - const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx]; - - // The jump-table might have been optimized away. - if (JT.MBBs.empty()) - return false; - - int MaxOffset = std::numeric_limits::min(), - MinOffset = std::numeric_limits::max(); - int BrOffset = BrOffsets[JTIdx]; - - bool Signed = false; - for (auto Block : JT.MBBs) { - int BlockOffset = BlockInfo[Block->getNumber()]; - MaxOffset = std::max(MaxOffset, BlockOffset - BrOffset); - MinOffset = std::min(MinOffset, BlockOffset - BrOffset); - } - if (MinOffset < 0) - Signed = true; - - if (std::max(std::abs(MinOffset), MaxOffset) == MinOffset) - MaxOffset = MinOffset; - - auto MFI = MF->getInfo(); - MCSymbol *JTS = MFI->getJumpTableSymbol(JTIdx); - - if (NMForce16BitJumpTables) { - MFI->setJumpTableEntryInfo(JTIdx, 2, JTS, Signed); - return false; - } - - bool EntrySize1 = - (Signed && isInt<8>(MaxOffset)) || (!Signed && isUInt<8>(MaxOffset)); - bool EntrySize2 = - (Signed && isInt<16>(MaxOffset)) || (!Signed && isUInt<16>(MaxOffset)); - int EntrySize = EntrySize1 ? 1 : (EntrySize2 ? 2 : 4); - if (EntrySize1 || EntrySize2) - MFI->setJumpTableEntryInfo(JTIdx, EntrySize, JTS, Signed); - - return false; -} - -bool NMCompressJumpTables::runOnMachineFunction(MachineFunction &Fn) { - STI = &static_cast(Fn.getSubtarget()); - TII = STI->getInstrInfo(); - bool Modified = false; - MF = &Fn; - - scanFunction(); - - for (MachineBasicBlock &MBB : *MF) { - int Offset = BlockInfo[MBB.getNumber()]; - for (MachineInstr &MI : MBB) { - Modified |= compressJumpTable(MI, Offset); - Offset += TII->getInstSizeInBytes(MI); - } - } - return Modified; -} - -namespace llvm { -FunctionPass *createNanoMipsCompressJumpTablesPass() { - return new NMCompressJumpTables(); -} -} // namespace llvm diff --git a/llvm/lib/Target/Mips/NanoMipsOptimizeJumpTables.cpp b/llvm/lib/Target/Mips/NanoMipsOptimizeJumpTables.cpp new file mode 100644 index 00000000000000..3d0e04688fcd9a --- /dev/null +++ b/llvm/lib/Target/Mips/NanoMipsOptimizeJumpTables.cpp @@ -0,0 +1,267 @@ +//===- NanoMipsOptimizeJumpTables.cpp - nanoMIPS optimize JTs --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains an optimization pass related to jump tables. Some +/// jump tables may not have any effect on the address of the following branch, +/// making them redundant. This optimization pass identifies such tables and +/// removes any associated code. It also compresses the Jump Table entries, +/// whenever possible. In the past, jump table entries used to be fixed size +/// (4B) and represented absolute addresses. However, we can now compress those +/// entries by filling them with specific offsets. This helps save at least 2B +/// per entry. The optimization pass checks if one or two bytes are sufficient +/// for the offset value. +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsMachineFunction.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/BranchFolding.h" +#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" + +#include + +using namespace llvm; + +#define NM_OPTIMIZE_JUMP_TABLES_OPT_NAME \ + "nanoMIPS jump table optimization pass" + +static cl::opt NMForce16BitJumpTables( + "nmips-force-16-bit-jump-table", cl::Hidden, cl::init(true), + cl::desc("Force all jump tables to have 16bit wide entries")); + +namespace { +struct NMOptimizeJumpTables : public MachineFunctionPass { + static char ID; + const MipsSubtarget *STI; + const TargetInstrInfo *TII; + MachineFunction *MF; + SmallVector BlockInfo; + SmallVector BrOffsets; + + int computeBlockSize(MachineBasicBlock &MBB); + void scanFunction(); + bool compressJumpTable(MachineInstr &MI); + bool optimizeRedundantEntries(MachineBasicBlock::iterator &I); + + NMOptimizeJumpTables() : MachineFunctionPass(ID) { + initializeNMOptimizeJumpTablesPass(*PassRegistry::getPassRegistry()); + } + StringRef getPassName() const override { + return NM_OPTIMIZE_JUMP_TABLES_OPT_NAME; + } + bool runOnMachineFunction(MachineFunction &Fn) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // namespace + +INITIALIZE_PASS_BEGIN(NMOptimizeJumpTables, "nm-optimize-jt", NM_OPTIMIZE_JUMP_TABLES_OPT_NAME, + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(NMOptimizeJumpTables, "nm-optimize-jt", NM_OPTIMIZE_JUMP_TABLES_OPT_NAME, + false, false) + +char NMOptimizeJumpTables::ID = 0; + +// TODO: Currently, there is no existing LLVM interface which we can use to tell the +// maximum potential size of a MachineInstr. Once we have it, this should be +// enhanced. +int NMOptimizeJumpTables::computeBlockSize(MachineBasicBlock &MBB) { + int Size = 0; + for (const MachineInstr &MI : MBB) + Size += TII->getInstSizeInBytes(MI); + return Size; +} + +void NMOptimizeJumpTables::scanFunction() { + BlockInfo.clear(); + BlockInfo.resize(MF->getNumBlockIDs()); + BrOffsets.clear(); + bool findBR = MF->getJumpTableInfo() && + !MF->getJumpTableInfo()->getJumpTables().empty(); + if (findBR) + BrOffsets.resize(MF->getJumpTableInfo()->getJumpTables().size()); + int Offset = 0; + for (MachineBasicBlock &MBB : *MF) { + BlockInfo[MBB.getNumber()] = Offset; + Offset += computeBlockSize(MBB); + if (findBR) + for (auto &MI : MBB) { + if (MI.getOpcode() == Mips::BRSC_NM) { + int JTIdx = MI.getOperand(1).getIndex(); + BrOffsets[JTIdx] = Offset; + break; + } + } + } +} + +bool NMOptimizeJumpTables::optimizeRedundantEntries( + MachineBasicBlock::iterator &I) { + auto JTOp = (*I).getOperand(3); + int JTIdx = JTOp.getIndex(); + auto &JTInfo = *MF->getJumpTableInfo(); + const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx]; + llvm::SmallPtrSet JTMBBS; + + // Collect all different JT MBBs. + for (auto MBB : JT.MBBs) + JTMBBS.insert(MBB); + + MachineBasicBlock *MBBToJumpTo = nullptr; + MachineBasicBlock *CurrBB = JTOp.getParent()->getParent(); + + for (auto *MBB : JTMBBS) { + auto I = MBB->getFirstNonDebugInstr(); + while (I == MBB->end() || I->isUnconditionalBranch()) { + // Empty block. Forward. + MBB = *MBB->successors().begin(); + I = MBB->getFirstNonDebugInstr(); + } + if (!MBBToJumpTo) + MBBToJumpTo = MBB; + if (MBBToJumpTo != MBB) + return false; + } + + // If we are not able to find a block to jump to, it implies JT with empty + // MBBs - an error in this case. + assert(MBBToJumpTo && "Empty Jump Table."); + + // Optimize JT and corresponding instructions. First, we want to replace + // LoadJumpTableOffset and BRSC_NM with an unconditional jump to MBBToJumpTo. + // MBBToJumpTo would be the only nonempty block. + // Insert new BC_NM instruction after LoadJumpTableOffset. + BuildMI(*CurrBB, I, JTOp.getParent()->getDebugLoc(), TII->get(Mips::BC_NM)) + .addMBB(MBBToJumpTo); + --I; + SmallVector InstrsToDelete; + + // We want to delete all unnecessary instructions after removing + // LoadJumpTableOffset. + for (auto &I : *CurrBB) { + for (llvm::MachineInstr::mop_iterator OpI = I.operands_begin(), + OpEnd = I.operands_end(); + OpI != OpEnd; ++OpI) { + llvm::MachineOperand &Operand = *OpI; + if (Operand.isIdenticalTo(JTOp)) + InstrsToDelete.push_back(&I); + } + } + + for (auto Instr : InstrsToDelete) + Instr->removeFromParent(); + return true; +} + +bool NMOptimizeJumpTables::compressJumpTable(MachineInstr &MI) { + int JTIdx = MI.getOperand(3).getIndex(); + auto &JTInfo = *MF->getJumpTableInfo(); + const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx]; + + // The jump-table might have been optimized away. + if (JT.MBBs.empty()) + return false; + + int MaxOffset = std::numeric_limits::min(), + MinOffset = std::numeric_limits::max(); + int BrOffset = BrOffsets[JTIdx]; + + bool Signed = false; + for (auto Block : JT.MBBs) { + int BlockOffset = BlockInfo[Block->getNumber()]; + MaxOffset = std::max(MaxOffset, BlockOffset - BrOffset); + MinOffset = std::min(MinOffset, BlockOffset - BrOffset); + } + if (MinOffset < 0) + Signed = true; + + if (std::max(std::abs(MinOffset), MaxOffset) == MinOffset) + MaxOffset = MinOffset; + + auto MFI = MF->getInfo(); + MCSymbol *JTS = MFI->getJumpTableSymbol(JTIdx); + + if (NMForce16BitJumpTables) { + MFI->setJumpTableEntryInfo(JTIdx, 2, JTS, Signed); + return false; + } + + bool EntrySize1 = + (Signed && isInt<8>(MaxOffset)) || (!Signed && isUInt<8>(MaxOffset)); + bool EntrySize2 = + (Signed && isInt<16>(MaxOffset)) || (!Signed && isUInt<16>(MaxOffset)); + int EntrySize = EntrySize1 ? 1 : (EntrySize2 ? 2 : 4); + if (EntrySize1 || EntrySize2) + MFI->setJumpTableEntryInfo(JTIdx, EntrySize, JTS, Signed); + + return false; +} + +bool NMOptimizeJumpTables::runOnMachineFunction(MachineFunction &Fn) { + STI = &static_cast(Fn.getSubtarget()); + TII = STI->getInstrInfo(); + bool Modified = false; + MF = &Fn; + + scanFunction(); + + bool CleanUpNeeded = false; + for (MachineBasicBlock &MBB : *MF) { + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; + ++I) { + MachineInstr &MI = *I; + if (MI.getOpcode() != Mips::LoadJumpTableOffset) + continue; + bool OptimizedJT = optimizeRedundantEntries(I); + CleanUpNeeded |= OptimizedJT; + Modified |= OptimizedJT; + if (!OptimizedJT) + Modified |= compressJumpTable(MI); + } + } + if (CleanUpNeeded) { + MBFIWrapper MBFI(getAnalysis()); + const MachineBranchProbabilityInfo *MBPI = + &getAnalysis(); + ProfileSummaryInfo *PSI = + &getAnalysis().getPSI(); + auto MLI = &getAnalysis(); + + BranchFolder BF(true, false, MBFI, *MBPI, PSI); + // We need a cleanup here, even though the pass runs after + // MachineBlockPlacement. + BF.OptimizeFunction(*MF, TII, MF->getSubtarget().getRegisterInfo(), MLI, + /*AfterPlacement=*/false); + } + return Modified; +} + +namespace llvm { +FunctionPass *createNanoMipsOptimizeJumpTablesPass() { + return new NMOptimizeJumpTables(); +} +} // namespace llvm diff --git a/llvm/test/CodeGen/Mips/nanomips/redundant_jump_table.mir b/llvm/test/CodeGen/Mips/nanomips/redundant_jump_table.mir new file mode 100644 index 00000000000000..0b07e1319848ef --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/redundant_jump_table.mir @@ -0,0 +1,43 @@ +# RUN: llc -mtriple=nanomips -O2 -run-pass=nm-optimize-jt \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: jumptab +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.3', '%bb.3', '%bb.3' ] +# CHECK: jumpTable: +# CHECK-NEXT: kind: block-address +# CHECK-NEXT: entries: +# CHECK-NEXT: - id: 0 +# CHECK-NEXT: blocks: [ ] +# CHECK-NOT: LA_NM +# CHECK-NOT: LoadJumpTableOffset +# CHECK-NOT: BRSC_NM +# CHECK: bb.1: +# CHECK: renamable $a0_nm = Li_NM 2 +# CHECK: bb.2: +# CHECK-NEXT: renamable $a0_nm = LWs9_NM $sp_nm, 2 +# CHECK-NEXT: RetRA implicit $a0_nm +body: | + bb.0: + successors: %bb.4(0x19999998), %bb.1(0x66666668) + BGEIUC_NM renamable $a0_nm, 4, %bb.4 + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + renamable $a1_nm = LA_NM %jump-table.0 + renamable $a0_nm = LoadJumpTableOffset killed renamable $a1_nm, renamable $a0_nm, %jump-table.0 + BRSC_NM renamable $a0_nm, %jump-table.0 + bb.2: + successors: %bb.3(0x80000000) + BC_NM %bb.3 + bb.3: + successors: %bb.4(0x80000000) + renamable $a0_nm = Li_NM 2 + BC_NM %bb.4 + bb.4: + renamable $a0_nm = LWs9_NM $sp_nm, 2 + RetRA implicit $a0_nm +...