diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5ddc7c7b529a68..25afa57e4b7018 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1961,6 +1961,10 @@ inline LoopFlags& operator&=(LoopFlags& a, LoopFlags b) class FlowGraphDfsTree { Compiler* m_comp; + + // Post-order that we saw reachable basic blocks in. This order can be + // particularly useful to iterate in reverse, as reverse post-order ensures + // that all predecessors are visited before successors whenever possible. BasicBlock** m_postOrder; unsigned m_postOrderCount; @@ -1996,20 +2000,120 @@ class FlowGraphDfsTree bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const; }; +// Represents the result of induction variable analysis. See +// FlowGraphNaturalLoop::AnalyzeIteration. +struct NaturalLoopIterInfo +{ + // The local that is the induction variable. + unsigned IterVar = BAD_VAR_NUM; + + // Constant value that the induction variable is initialized with, outside + // the loop. Only valid if HasConstInit is true. + int ConstInitValue = 0; + + // Block outside the loop that initializes the induction variable. Only + // value if HasConstInit is true. + BasicBlock* InitBlock = nullptr; + + // Tree that has the loop test for the induction variable. + GenTree* TestTree = nullptr; + + // Tree that mutates the induction variable. + GenTree* IterTree = nullptr; + + // Whether or not we found an initialization of the induction variable. + bool HasConstInit : 1; + + // Whether or not the loop test compares the induction variable with a + // constant value. + bool HasConstLimit : 1; + + // Whether or not the loop test constant value is a SIMD vector element count. + bool HasSimdLimit : 1; + + // Whether or not the loop test compares the induction variable with an + // invariant local. + bool HasInvariantLocalLimit : 1; + + // Whether or not the loop test compares the induction variable with the + // length of an invariant array. + bool HasArrayLengthLimit : 1; + + NaturalLoopIterInfo() + : HasConstInit(false) + , HasConstLimit(false) + , HasSimdLimit(false) + , HasInvariantLocalLimit(false) + , HasArrayLengthLimit(false) + { + } + + int IterConst(); + genTreeOps IterOper(); + var_types IterOperType(); + bool IsReversed(); + genTreeOps TestOper(); + bool IsIncreasingLoop(); + bool IsDecreasingLoop(); + GenTree* Iterator(); + GenTree* Limit(); + int ConstLimit(); + unsigned VarLimit(); + bool ArrLenLimit(Compiler* comp, ArrIndex* index); +}; + +// Represents a natural loop in the flow graph. Natural loops are characterized +// by the following properties: +// +// * All loop blocks are strongly connected, meaning that every block of the +// loop can reach every other block of the loop. +// +// * All loop blocks are dominated by the header block, i.e. the header block +// is guaranteed to be entered on every iteration. +// +// * From the above it follows that the loop can only be entered at the header +// block. FlowGraphNaturalLoop::EntryEdges() gives a vector of these edges. +// After loop canonicalization it is expected that this vector has exactly one +// edge, from the "preheader". +// +// * The loop can have multiple exits. The regular exit edges are recorded in +// FlowGraphNaturalLoop::ExitEdges(). The loop can also be exited by +// exceptional flow. +// class FlowGraphNaturalLoop { friend class FlowGraphNaturalLoops; + // The DFS tree that contains the loop blocks. const FlowGraphDfsTree* m_tree; + + // The header block; dominates all other blocks in the loop, and is the + // only block branched to from outside the loop. BasicBlock* m_header; + + // Parent loop. By loop properties, well-scopedness is always guaranteed. + // That is, the parent loop contains all blocks of this loop. FlowGraphNaturalLoop* m_parent = nullptr; + // Bit vector of blocks in the loop; each index is the RPO index a block, // with the head block's RPO index subtracted. BitVec m_blocks; + + // Side of m_blocks. unsigned m_blocksSize = 0; + + // Edges from blocks inside the loop back to the header. jitstd::vector m_backEdges; + + // Edges from blocks outside the loop to the header. jitstd::vector m_entryEdges; + + // Edges from inside the loop to outside the loop. Note that exceptional + // flow can also exit the loop and is not modelled. jitstd::vector m_exitEdges; + + // Index of the loop in the range [0..FlowGraphNaturalLoops::NumLoops()). + // Can be used to store additional annotations for this loop on the side. unsigned m_index = 0; FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* head); @@ -2018,6 +2122,14 @@ class FlowGraphNaturalLoop bool TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex); BitVecTraits LoopBlockTraits(); + + template + bool VisitDefs(TFunc func); + + GenTreeLclVarCommon* FindDef(unsigned lclNum); + + void MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init); + bool MatchLimit(NaturalLoopIterInfo* info, GenTree* test); public: BasicBlock* GetHeader() const { @@ -2056,6 +2168,8 @@ class FlowGraphNaturalLoop bool ContainsBlock(BasicBlock* block); + unsigned NumLoopBlocks(); + template BasicBlockVisit VisitLoopBlocksReversePostOrder(TFunc func); @@ -2064,12 +2178,34 @@ class FlowGraphNaturalLoop template BasicBlockVisit VisitLoopBlocks(TFunc func); + + template + BasicBlockVisit VisitLoopBlocksLexical(TFunc func); + + BasicBlock* GetLexicallyTopMostBlock(); + BasicBlock* GetLexicallyBottomMostBlock(); + + bool AnalyzeIteration(NaturalLoopIterInfo* info); + + bool HasDef(unsigned lclNum); }; +// Represents a collection of the natural loops in the flow graph. See +// FlowGraphNaturalLoop for the characteristics of these loops. +// +// Loops are stored in a vector, with easily accessible indices (see +// FlowGraphNaturalLoop::GetIndex()). These indices can be used to store +// additional annotations for each loop on the side. +// class FlowGraphNaturalLoops { const FlowGraphDfsTree* m_dfs; + + // Collection of loops that were found. jitstd::vector m_loops; + + // Whether or not we saw any non-natural loop cycles, also known as + // irreducible loops. unsigned m_improperLoopHeaders = 0; FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs); @@ -2148,6 +2284,7 @@ class FlowGraphNaturalLoops static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs); }; +// Represents the dominator tree of the flow graph. class FlowGraphDominatorTree { template @@ -4713,6 +4850,10 @@ class Compiler unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder FlowGraphDfsTree* m_dfs; + FlowGraphNaturalLoops* m_loops; + struct LoopDsc; + LoopDsc** m_newToOldLoop; + FlowGraphNaturalLoop** m_oldToNewLoop; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -6558,7 +6699,7 @@ class Compiler void optFindLoops(); PhaseStatus optCloneLoops(); - void optCloneLoop(unsigned loopInd, LoopCloneContext* context); + void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context); PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info) void optRemoveRedundantZeroInits(); PhaseStatus optIfConversion(); // If conversion @@ -6870,7 +7011,7 @@ class Compiler bool optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar); bool optPopulateInitInfo(unsigned loopInd, BasicBlock* initBlock, GenTree* init, unsigned iterVar); bool optExtractInitTestIncr( - BasicBlock** pInitBlock, BasicBlock* bottom, BasicBlock* exit, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr); + BasicBlock** pInitBlock, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr); void optFindNaturalLoops(); @@ -7989,34 +8130,34 @@ class Compiler public: struct LoopCloneVisitorInfo { - LoopCloneContext* context; - Statement* stmt; - const unsigned loopNum; - const bool cloneForArrayBounds; - const bool cloneForGDVTests; - LoopCloneVisitorInfo(LoopCloneContext* context, - unsigned loopNum, - Statement* stmt, - bool cloneForArrayBounds, - bool cloneForGDVTests) + LoopCloneContext* context; + Statement* stmt; + FlowGraphNaturalLoop* loop; + const bool cloneForArrayBounds; + const bool cloneForGDVTests; + LoopCloneVisitorInfo(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + Statement* stmt, + bool cloneForArrayBounds, + bool cloneForGDVTests) : context(context) , stmt(nullptr) - , loopNum(loopNum) + , loop(loop) , cloneForArrayBounds(cloneForArrayBounds) , cloneForGDVTests(cloneForGDVTests) { } }; - bool optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum); + bool optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum); bool optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndexHelp(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndex(GenTree* tree, ArrIndex* result); - bool optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context); + bool optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context); static fgWalkPreFn optCanOptimizeByLoopCloningVisitor; fgWalkResult optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info); bool optObtainLoopCloningOpts(LoopCloneContext* context); - bool optIsLoopClonable(unsigned loopInd); + bool optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context); bool optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info); bool optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType); @@ -8025,13 +8166,13 @@ class Compiler #ifdef DEBUG void optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore); #endif - void optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool fastPath)); - bool optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context); - bool optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context); - BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, - unsigned loopNum, - BasicBlock* slowHead, - BasicBlock* insertAfter); + void optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, LoopCloneContext* context DEBUGARG(bool fastPath)); + bool optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + bool optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context); + BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + BasicBlock* slowHead, + BasicBlock* insertAfter); protected: ssize_t optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk)); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 5492577d3b77e7..640fee9c0ef970 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5015,6 +5015,47 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocks(TFunc func) return VisitLoopBlocksReversePostOrder(func); } +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocksLexical: Visit the loop's blocks in +// lexical order. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func) +{ + BasicBlock* top = m_header; + BasicBlock* bottom = m_header; + VisitLoopBlocks([&](BasicBlock* block) { + if (block->bbNum < top->bbNum) + top = block; + if (block->bbNum > bottom->bbNum) + bottom = block; + return BasicBlockVisit::Continue; + }); + + BasicBlock* block = top; + while (true) + { + if (ContainsBlock(block) && (func(block) == BasicBlockVisit::Abort)) + return BasicBlockVisit::Abort; + + if (block == bottom) + return BasicBlockVisit::Continue; + + block = block->Next(); + } +} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 8efb82a2d91309..1971048b7e85bf 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -65,7 +65,8 @@ void Compiler::fgInit() fgBBVarSetsInited = false; fgReturnCount = 0; - m_dfs = nullptr; + m_dfs = nullptr; + m_loops = nullptr; // Initialize BlockSet data. fgCurBBEpoch = 0; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 2aa8ce36d0c131..f38ba2f7dd535e 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4197,8 +4197,7 @@ BitVecTraits FlowGraphNaturalLoop::LoopBlockTraits() // Remarks: // Containment here means that the block is in the SCC of the loop; i.e. it // is in a cycle with the header block. Note that EH successors are taken -// into acount; for example, a BBJ_RETURN may still be a loop block provided -// that its handler can reach the loop header. +// into account. // bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) { @@ -4212,6 +4211,18 @@ bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) return BitVecOps::IsMember(&traits, m_blocks, index); } +//------------------------------------------------------------------------ +// NumLoopBlocks: Get the number of blocks in the SCC of the loop. +// +// Returns: +// Count of blocks. +// +unsigned FlowGraphNaturalLoop::NumLoopBlocks() +{ + BitVecTraits loopTraits = LoopBlockTraits(); + return BitVecOps::Count(&loopTraits, m_blocks); +} + //------------------------------------------------------------------------ // FlowGraphNaturalLoops::FlowGraphNaturalLoops: Initialize a new instance to // track a set of loops over the flow graph. @@ -4544,6 +4555,647 @@ bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, ji return true; } +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitDefs: Visit all definitions contained in the +// loop. +// +// Type parameters: +// TFunc - Callback functor type +// +// Parameters: +// func - Callback functor that accepts a GenTreeLclVarCommon* and returns a +// bool. On true, continue looking for defs; on false, abort. +// +// Returns: +// True if all defs were visited and the functor never returned false; otherwise false. +// +template +bool FlowGraphNaturalLoop::VisitDefs(TFunc func) +{ + class VisitDefsVisitor : public GenTreeVisitor + { + using GenTreeVisitor::m_compiler; + + TFunc& m_func; + + public: + enum + { + DoPreOrder = true, + }; + + VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor(comp), m_func(func) + { + } + + Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + if ((tree->gtFlags & GTF_ASG) == 0) + { + return Compiler::WALK_SKIP_SUBTREES; + } + + GenTreeLclVarCommon* lclDef; + if (tree->DefinesLocal(m_compiler, &lclDef)) + { + if (!m_func(lclDef)) + return Compiler::WALK_ABORT; + } + + return Compiler::WALK_CONTINUE; + } + }; + + VisitDefsVisitor visitor(m_tree->GetCompiler(), func); + + BasicBlockVisit result = VisitLoopBlocks([&](BasicBlock* loopBlock) { + for (Statement* stmt : loopBlock->Statements()) + { + if (visitor.WalkTree(stmt->GetRootNodePointer(), nullptr) == Compiler::WALK_ABORT) + return BasicBlockVisit::Abort; + } + + return BasicBlockVisit::Continue; + }); + + return result == BasicBlockVisit::Continue; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::FindDef: Find a def of the specified local number. +// +// Parameters: +// lclNum - The local. +// +// Returns: +// Tree that represents a def of the local; nullptr if no def was found. +// +// Remarks: +// Does not take promotion into account. +// +GenTreeLclVarCommon* FlowGraphNaturalLoop::FindDef(unsigned lclNum) +{ + GenTreeLclVarCommon* result = nullptr; + VisitDefs([&result, lclNum](GenTreeLclVarCommon* def) { + if (def->GetLclNum() == lclNum) + { + result = def; + return false; + } + + return true; + }); + + return result; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::AnalyzeIteration: Analyze the induction structure of +// the loop. +// +// Parameters: +// info - [out] Loop information +// +// Returns: +// True if the structure was analyzed and we can make guarantees about it; +// otherwise false. +// +// Remarks: +// The function guarantees that at all definitions of the iteration local, +// the loop condition is reestablished before iteration continues. In other +// words, if this function returns true the loop invariant is guaranteed to +// be upheld in all blocks in the loop (but see below for establishing the +// base case). +// +// Currently we do not validate that there is a zero-trip test ensuring the +// condition is true, so it is up to the user to validate that. This is +// normally done via GTF_RELOP_ZTT set by loop inversion. +// +bool FlowGraphNaturalLoop::AnalyzeIteration(NaturalLoopIterInfo* info) +{ + JITDUMP("Analyzing iteration for " FMT_LP " with header " FMT_BB "\n", m_index, m_header->bbNum); + + const FlowGraphDfsTree* dfs = m_tree; + Compiler* comp = dfs->GetCompiler(); + assert((m_entryEdges.size() == 1) && "Expected preheader"); + + BasicBlock* preheader = m_entryEdges[0]->getSourceBlock(); + + JITDUMP(" Preheader = " FMT_BB "\n", preheader->bbNum); + + // TODO-Quirk: For backwards compatibility always try the lexically + // bottom-most block for the loop variable. + BasicBlock* bottom = GetLexicallyBottomMostBlock(); + JITDUMP(" Bottom = " FMT_BB "\n", bottom->bbNum); + + BasicBlock* cond = bottom; + BasicBlock* initBlock = preheader; + GenTree* init; + GenTree* test; + if (!cond->KindIs(BBJ_COND) || + !comp->optExtractInitTestIncr(&initBlock, bottom, m_header, &init, &test, &info->IterTree)) + { + // TODO-CQ: Try all exit edges here to see if we can find an induction variable. + JITDUMP(" Could not extract induction variable from bottom\n"); + return false; + } + + info->IterVar = comp->optIsLoopIncrTree(info->IterTree); + + assert(info->IterVar != BAD_VAR_NUM); + LclVarDsc* const iterVarDsc = comp->lvaGetDesc(info->IterVar); + + // Bail on promoted case, otherwise we'd have to search the loop + // for both iterVar and its parent. + // TODO-CQ: Fix this + // + if (iterVarDsc->lvIsStructField) + { + JITDUMP(" iterVar V%02u is a promoted field\n", info->IterVar); + return false; + } + + // Bail on the potentially aliased case. + // + if (iterVarDsc->IsAddressExposed()) + { + JITDUMP(" iterVar V%02u is address exposed\n", info->IterVar); + return false; + } + + if (init == nullptr) + { + JITDUMP(" Init = , test = [%06u], incr = [%06u]\n", Compiler::dspTreeID(test), + Compiler::dspTreeID(info->IterTree)); + } + else + { + JITDUMP(" Init = [%06u], test = [%06u], incr = [%06u]\n", Compiler::dspTreeID(init), Compiler::dspTreeID(test), + Compiler::dspTreeID(info->IterTree)); + } + + if (!MatchLimit(info, test)) + { + return false; + } + + MatchInit(info, initBlock, init); + + bool result = VisitDefs([=](GenTreeLclVarCommon* def) { + if ((def->GetLclNum() != info->IterVar) || (def == info->IterTree)) + return true; + + JITDUMP(" Loop has extraneous def [%06u]\n", Compiler::dspTreeID(def)); + return false; + }); + + if (!result) + { + return false; + } + +#ifdef DEBUG + if (comp->verbose) + { + printf(" IterVar = V%02u\n", info->IterVar); + + if (info->HasConstInit) + printf(" Const init with value %d in " FMT_BB "\n", info->ConstInitValue, info->InitBlock->bbNum); + + printf(" Test is [%06u] (", Compiler::dspTreeID(info->TestTree)); + if (info->HasConstLimit) + printf("const limit "); + if (info->HasSimdLimit) + printf("simd limit "); + if (info->HasInvariantLocalLimit) + printf("invariant local limit "); + if (info->HasArrayLengthLimit) + printf("array length limit "); + printf(")\n"); + } +#endif + + return result; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::MatchInit: Try to pattern match the initialization of +// an induction variable. +// +// Parameters: +// info - [in, out] Info structure to query and fill out +// initBlock - Block containing the initialization tree +// init - Initialization tree +// +// Remarks: +// We do not necessarily guarantee or require to be able to find any +// initialization. +// +void FlowGraphNaturalLoop::MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init) +{ + if ((init == nullptr) || !init->OperIs(GT_STORE_LCL_VAR) || (init->AsLclVarCommon()->GetLclNum() != info->IterVar)) + return; + + GenTree* initValue = init->AsLclVar()->Data(); + if (!initValue->IsCnsIntOrI() || !initValue->TypeIs(TYP_INT)) + return; + + info->HasConstInit = true; + info->ConstInitValue = (int)initValue->AsIntCon()->IconValue(); + info->InitBlock = initBlock; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::MatchLimit: Try to pattern match the loop test of an +// induction variable. +// +// Parameters: +// info - [in, out] Info structure to query and fill out +// test - Loop condition test +// +// Returns: +// True if the loop condition was recognized and "info" was filled out. +// +// Remarks: +// Unlike the initialization, we do require that we are able to match the +// loop condition. +// +bool FlowGraphNaturalLoop::MatchLimit(NaturalLoopIterInfo* info, GenTree* test) +{ + // Obtain the relop from the "test" tree. + GenTree* relop; + if (test->OperIs(GT_JTRUE)) + { + relop = test->gtGetOp1(); + } + else + { + assert(test->OperIs(GT_STORE_LCL_VAR)); + relop = test->AsLclVar()->Data(); + } + + noway_assert(relop->OperIsCompare()); + + GenTree* opr1 = relop->AsOp()->gtOp1; + GenTree* opr2 = relop->AsOp()->gtOp2; + + GenTree* iterOp; + GenTree* limitOp; + + // Make sure op1 or op2 is the iterVar. + if (opr1->gtOper == GT_LCL_VAR && opr1->AsLclVarCommon()->GetLclNum() == info->IterVar) + { + iterOp = opr1; + limitOp = opr2; + } + else if (opr2->gtOper == GT_LCL_VAR && opr2->AsLclVarCommon()->GetLclNum() == info->IterVar) + { + iterOp = opr2; + limitOp = opr1; + } + else + { + return false; + } + + if (iterOp->gtType != TYP_INT) + { + return false; + } + + // Mark the iterator node. + iterOp->gtFlags |= GTF_VAR_ITERATOR; + + // Check what type of limit we have - constant, variable or arr-len. + if (limitOp->gtOper == GT_CNS_INT) + { + info->HasConstLimit = true; + if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0) + { + info->HasSimdLimit = true; + } + } + else if (limitOp->OperIs(GT_LCL_VAR)) + { + // See if limit var is a loop invariant + // + GenTreeLclVarCommon* def = FindDef(limitOp->AsLclVarCommon()->GetLclNum()); + if (def != nullptr) + { + JITDUMP(" Limit var V%02u modified by [%06u]\n", limitOp->AsLclVarCommon()->GetLclNum(), + Compiler::dspTreeID(def)); + return false; + } + + info->HasInvariantLocalLimit = true; + } + else if (limitOp->OperIs(GT_ARR_LENGTH)) + { + // See if limit array is a loop invariant + // + GenTree* const array = limitOp->AsArrLen()->ArrRef(); + + if (!array->OperIs(GT_LCL_VAR)) + { + JITDUMP(" Array limit tree [%06u] not analyzable\n", Compiler::dspTreeID(limitOp)); + return false; + } + + GenTreeLclVarCommon* def = FindDef(array->AsLclVar()->GetLclNum()); + if (def != nullptr) + { + JITDUMP(" Array limit var V%02u modified by [%06u]\n", array->AsLclVarCommon()->GetLclNum(), + Compiler::dspTreeID(def)); + return false; + } + + info->HasArrayLengthLimit = true; + } + else + { + JITDUMP(" Loop limit tree [%06u] not analyzable\n", Compiler::dspTreeID(limitOp)); + return false; + } + + // Were we able to successfully analyze the limit? + // + assert(info->HasConstLimit || info->HasInvariantLocalLimit || info->HasArrayLengthLimit); + + info->TestTree = relop; + return true; +} + +//------------------------------------------------------------------------ +// GetLexicallyTopMostBlock: Get the lexically top-most block contained within +// the loop. +// +// Returns: +// Block with highest bbNum. +// +// Remarks: +// Mostly exists as a quirk while transitioning from the old loop +// representation to the new one. +// +BasicBlock* FlowGraphNaturalLoop::GetLexicallyTopMostBlock() +{ + BasicBlock* top = m_header; + VisitLoopBlocks([&top](BasicBlock* loopBlock) { + if (loopBlock->bbNum < top->bbNum) + top = loopBlock; + return BasicBlockVisit::Continue; + }); + + return top; +} + +//------------------------------------------------------------------------ +// GetLexicallyBottomMostBlock: Get the lexically bottom-most block contained +// within the loop. +// +// Returns: +// Block with highest bbNum. +// +// Remarks: +// Mostly exists as a quirk while transitioning from the old loop +// representation to the new one. +// +BasicBlock* FlowGraphNaturalLoop::GetLexicallyBottomMostBlock() +{ + BasicBlock* bottom = m_header; + VisitLoopBlocks([&bottom](BasicBlock* loopBlock) { + if (loopBlock->bbNum > bottom->bbNum) + bottom = loopBlock; + return BasicBlockVisit::Continue; + }); + + return bottom; +} + +//------------------------------------------------------------------------ +// HasDef: Check if a local is defined anywhere in the loop. +// +// Parameters: +// lclNum - Local to check for a def for. +// +// Returns: +// True if the local has any def. +// +bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) +{ + Compiler* comp = m_tree->GetCompiler(); + LclVarDsc* dsc = comp->lvaGetDesc(lclNum); + + assert(!comp->lvaVarAddrExposed(lclNum)); + // Currently does not handle promoted locals, only fields. + assert(!dsc->lvPromoted); + + unsigned defLclNum1 = lclNum; + unsigned defLclNum2 = BAD_VAR_NUM; + if (dsc->lvIsStructField) + { + defLclNum2 = dsc->lvParentLcl; + } + + bool result = VisitDefs([=](GenTreeLclVarCommon* lcl) { + if ((lcl->GetLclNum() == defLclNum1) || (lcl->GetLclNum() == defLclNum2)) + { + return false; + } + + return true; + }); + + // If we stopped early we found a def. + return !result; +} + +//------------------------------------------------------------------------ +// IterConst: Get the constant with which the iterator is modified +// +// Returns: +// Constant value. +// +int NaturalLoopIterInfo::IterConst() +{ + GenTree* value = IterTree->AsLclVar()->Data(); + return (int)value->gtGetOp2()->AsIntCon()->IconValue(); +} + +//------------------------------------------------------------------------ +// IterOper: Get the type of the operation on the iterator +// +// Returns: +// Oper +// +genTreeOps NaturalLoopIterInfo::IterOper() +{ + return IterTree->AsLclVar()->Data()->OperGet(); +} + +//------------------------------------------------------------------------ +// IterOperType: Get the type of the operation on the iterator. +// +// Returns: +// Type, used for overflow instructions. +// +var_types NaturalLoopIterInfo::IterOperType() +{ + assert(genActualType(IterTree) == TYP_INT); + return IterTree->TypeGet(); +} + +//------------------------------------------------------------------------ +// IsReversed: Returns true if the iterator node is the second operand in the +// loop condition. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsReversed() +{ + return TestTree->gtGetOp2()->OperIs(GT_LCL_VAR) && ((TestTree->gtGetOp2()->gtFlags & GTF_VAR_ITERATOR) != 0); +} + +//------------------------------------------------------------------------ +// TestOper: The type of the comparison between the iterator and the limit +// (GT_LE, GT_GE, etc.) +// +// Returns: +// Oper. +// +genTreeOps NaturalLoopIterInfo::TestOper() +{ + genTreeOps op = TestTree->OperGet(); + return IsReversed() ? GenTree::SwapRelop(op) : op; +} + +//------------------------------------------------------------------------ +// IsIncreasingLoop: Returns true if the loop iterator increases from low to +// high value. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsIncreasingLoop() +{ + // Increasing loop is the one that has "+=" increment operation and "< or <=" limit check. + bool isLessThanLimitCheck = GenTree::StaticOperIs(TestOper(), GT_LT, GT_LE); + return (isLessThanLimitCheck && + (((IterOper() == GT_ADD) && (IterConst() > 0)) || ((IterOper() == GT_SUB) && (IterConst() < 0)))); +} + +//------------------------------------------------------------------------ +// IsIncreasingLoop: Returns true if the loop iterator decreases from high to +// low value. +// +// Returns: +// True if so. +// +bool NaturalLoopIterInfo::IsDecreasingLoop() +{ + // Decreasing loop is the one that has "-=" decrement operation and "> or >=" limit check. If the operation is + // "+=", make sure the constant is negative to give an effect of decrementing the iterator. + bool isGreaterThanLimitCheck = GenTree::StaticOperIs(TestOper(), GT_GT, GT_GE); + return (isGreaterThanLimitCheck && + (((IterOper() == GT_ADD) && (IterConst() < 0)) || ((IterOper() == GT_SUB) && (IterConst() > 0)))); +} + +//------------------------------------------------------------------------ +// Iterator: Get the iterator node in the loop test +// +// Returns: +// Iterator node. +// +GenTree* NaturalLoopIterInfo::Iterator() +{ + return IsReversed() ? TestTree->gtGetOp2() : TestTree->gtGetOp1(); +} + +//------------------------------------------------------------------------ +// Limit: Get the limit node in the loop test. +// +// Returns: +// Iterator node. +// +GenTree* NaturalLoopIterInfo::Limit() +{ + return IsReversed() ? TestTree->gtGetOp1() : TestTree->gtGetOp2(); +} + +//------------------------------------------------------------------------ +// ConstLimit: Get the constant value of the iterator limit, i.e. when the loop +// condition is "i RELOP const". +// +// Returns: +// Limit constant. +// +// Remarks: +// Only valid if HasConstLimit is true. +// +int NaturalLoopIterInfo::ConstLimit() +{ + assert(HasConstLimit); + GenTree* limit = Limit(); + assert(limit->OperIsConst()); + return (int)limit->AsIntCon()->gtIconVal; +} + +//------------------------------------------------------------------------ +// VarLimit: Get the local var num used in the loop condition, i.e. when the +// loop condition is "i RELOP lclVar" with a loop invariant local. +// +// Returns: +// Local var number. +// +// Remarks: +// Only valid if HasInvariantLocalLimit is true. +// +unsigned NaturalLoopIterInfo::VarLimit() +{ + assert(HasInvariantLocalLimit); + + GenTree* limit = Limit(); + assert(limit->OperGet() == GT_LCL_VAR); + return limit->AsLclVarCommon()->GetLclNum(); +} + +//------------------------------------------------------------------------ +// ArrLenLimit: Get the array length used in the loop condition, i.e. when the +// loop condition is "i RELOP arr.len". +// +// Parameters: +// comp - Compiler instance +// index - [out] Array index information +// +// Returns: +// True if the array length was extracted. +// +// Remarks: +// Only valid if HasArrayLengthLimit is true. +// +bool NaturalLoopIterInfo::ArrLenLimit(Compiler* comp, ArrIndex* index) +{ + assert(HasArrayLengthLimit); + + GenTree* limit = Limit(); + assert(limit->OperIs(GT_ARR_LENGTH)); + + // Check if we have a.length or a[i][j].length + if (limit->AsArrLen()->ArrRef()->OperIs(GT_LCL_VAR)) + { + index->arrLcl = limit->AsArrLen()->ArrRef()->AsLclVarCommon()->GetLclNum(); + index->rank = 0; + return true; + } + // We have a[i].length, extract a[i] pattern. + else if (limit->AsArrLen()->ArrRef()->OperIs(GT_COMMA)) + { + return comp->optReconstructArrIndex(limit->AsArrLen()->ArrRef(), index); + } + return false; +} + //------------------------------------------------------------------------ // FlowGraphDominatorTree::IntersectDom: // Find common IDom parent, much like least common ancestor. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index a3aa7fa3426e5e..67f154dfebd2d8 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -11,6 +11,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" +#include "jitstd/algorithm.h" #ifdef DEBUG @@ -783,6 +784,33 @@ void LoopCloneContext::PrintConditions(unsigned loopNum) } #endif +//-------------------------------------------------------------------------------------------------- +// GetLoopIterInfo: Get the analyzed loop iteration for a loop. +// +// Arguments: +// loopNum - Index of loop, as returned by FlowGraphNaturalLoop::GetIndex(). +// +// Returns: +// The info, or nullptr if the loop iteration structure could not be +// analyzed. +// +NaturalLoopIterInfo* LoopCloneContext::GetLoopIterInfo(unsigned loopNum) +{ + return iterInfo[loopNum]; +} + +//-------------------------------------------------------------------------------------------------- +// SetLoopIterInfo: Set the analyzed loop iteration for a loop. +// +// Arguments: +// loopNum - Index of loop, as returned by FlowGraphNaturalLoop::GetIndex(). +// info - Info to store +// +void LoopCloneContext::SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info) +{ + iterInfo[loopNum] = info; +} + //-------------------------------------------------------------------------------------------------- // CondToStmtInBlock: Convert an array of conditions to IR. Evaluate them into a JTRUE stmt and add it to // a new block after `insertAfter`. @@ -1068,8 +1096,8 @@ LC_ArrayDeref* LC_ArrayDeref::Find(JitExpandArrayStack* children // optDeriveLoopCloningConditions: Derive loop cloning conditions. // // Arguments: -// loopNum - the current loop index for which conditions are derived. -// context - data structure where all loop cloning info is kept. +// loop - the current loop for which conditions are derived. +// context - data structure where all loop cloning info is kept. // // Return Value: // "false" if conditions cannot be obtained. "true" otherwise. @@ -1088,13 +1116,12 @@ LC_ArrayDeref* LC_ArrayDeref::Find(JitExpandArrayStack* children // Callers should assume AND operation is used i.e., if all conditions are // true, then take the fast path. // -bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { JITDUMP("------------------------------------------------------------\n"); - JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loopNum); + JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loop->GetIndex()); - LoopDsc* loop = &optLoopTable[loopNum]; - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos->Size() > 0); // We only need to check for iteration behavior if we have array checks. @@ -1117,8 +1144,8 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Ident objDeref = LC_Ident::CreateIndirOfLocal(ttInfo->lclNum, 0); LC_Ident methodTable = LC_Ident::CreateClassHandle(ttInfo->clsHnd); LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methodTable)); - context->EnsureObjDerefs(loopNum)->Push(objDeref); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureObjDerefs(loop->GetIndex())->Push(objDeref); + context->EnsureConditions(loop->GetIndex())->Push(cond); break; } @@ -1139,8 +1166,8 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methAddr)); - context->EnsureObjDerefs(loopNum)->Push(objDeref); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureObjDerefs(loop->GetIndex())->Push(objDeref); + context->EnsureConditions(loop->GetIndex())->Push(cond); break; } @@ -1155,27 +1182,28 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // No array conditions here, so we're done // JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); return true; } + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); // Note we see cases where the test oper is NE (array.Len) which we could handle // with some extra care. // - if (!GenTree::StaticOperIs(loop->lpTestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) + if (!GenTree::StaticOperIs(iterInfo->TestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) { // We can't reason about how this loop iterates return false; } - const bool isIncreasingLoop = loop->lpIsIncreasingLoop(); - assert(isIncreasingLoop || loop->lpIsDecreasingLoop()); + const bool isIncreasingLoop = iterInfo->IsIncreasingLoop(); + assert(isIncreasingLoop || iterInfo->IsDecreasingLoop()); // We already know that this is either increasing or decreasing loop and the // stride is (> 0) or (< 0). Here, just take the abs() value and check if it // is beyond the limit. - int stride = abs(loop->lpIterConst()); + int stride = abs(iterInfo->IterConst()); if (stride >= 58) { @@ -1189,27 +1217,27 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Ident ident; // Init conditions - if (loop->lpFlags & LPFLG_CONST_INIT) + if (iterInfo->HasConstInit) { // Only allowing non-negative const init at this time. // This is because the variable initialized with this constant will be used as an array index, // and array indices must be non-negative. - if (loop->lpConstInit < 0) + if (iterInfo->ConstInitValue < 0) { - JITDUMP("> Init %d is invalid\n", loop->lpConstInit); + JITDUMP("> Init %d is invalid\n", iterInfo->ConstInitValue); return false; } if (!isIncreasingLoop) { // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident::CreateConst(static_cast(loop->lpConstInit)); + ident = LC_Ident::CreateConst(static_cast(iterInfo->ConstInitValue)); } } else { // iterVar >= 0 - const unsigned initLcl = loop->lpIterVar(); + const unsigned initLcl = iterInfo->IterVar; if (!genActualTypeIsInt(lvaGetDesc(initLcl))) { JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl); @@ -1227,13 +1255,13 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext ident = LC_Ident::CreateVar(initLcl); geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident::CreateConst(0u))); } - context->EnsureConditions(loopNum)->Push(geZero); + context->EnsureConditions(loop->GetIndex())->Push(geZero); } // Limit Conditions - if (loop->lpFlags & LPFLG_CONST_LIMIT) + if (iterInfo->HasConstLimit) { - int limit = loop->lpConstLimit(); + int limit = iterInfo->ConstLimit(); if (limit < 0) { JITDUMP("> limit %d is invalid\n", limit); @@ -1246,9 +1274,9 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext ident = LC_Ident::CreateConst(static_cast(limit)); } } - else if (loop->lpFlags & LPFLG_VAR_LIMIT) + else if (iterInfo->HasInvariantLocalLimit) { - const unsigned limitLcl = loop->lpVarLimit(); + const unsigned limitLcl = iterInfo->VarLimit(); if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) { JITDUMP("> Limit var V%02u not compatible with TYP_INT\n", limitLcl); @@ -1267,12 +1295,12 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident::CreateVar(limitLcl)), LC_Expr(LC_Ident::CreateConst(0u))); } - context->EnsureConditions(loopNum)->Push(geZero); + context->EnsureConditions(loop->GetIndex())->Push(geZero); } - else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + else if (iterInfo->HasArrayLengthLimit) { ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); - if (!loop->lpArrLenLimit(this, index)) + if (!iterInfo->ArrLenLimit(this, index)) { JITDUMP("> ArrLen not matching\n"); return false; @@ -1281,7 +1309,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, index, LC_Array::None); - context->EnsureArrayDerefs(loopNum)->Push(array); + context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } else { @@ -1297,7 +1325,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // GT_GT loop test: (end > start) ==> (end <= arrLen) // GT_GE loop test: (end >= start) ==> (end < arrLen) genTreeOps opLimitCondition; - switch (loop->lpTestOper()) + switch (iterInfo->TestOper()) { case GT_LT: case GT_GT: @@ -1322,11 +1350,11 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureConditions(loop->GetIndex())->Push(cond); // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); - context->EnsureArrayDerefs(loopNum)->Push(array); + context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } break; case LcOptInfo::LcMdArray: @@ -1336,7 +1364,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext mdArrInfo->dim, LC_Array::None)); LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); + context->EnsureConditions(loop->GetIndex())->Push(cond); // TODO: ensure array is dereference-able? } @@ -1352,7 +1380,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext } JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); return true; @@ -1362,8 +1390,8 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays. // // Arguments: -// loopNum - the current loop index for which conditions are derived. -// context - data structure where all loop cloning info is kept. +// loop - the current loop for which conditions are derived. +// context - data structure where all loop cloning info is kept. // // Return Value: // "false" if conditions cannot be obtained. "true" otherwise. @@ -1463,11 +1491,11 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // // and so on. // -bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { // Get the dereference-able arrays and objects. - JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loopNum); - JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loopNum); + JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loop->GetIndex()); + JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loop->GetIndex()); // We currently expect to have at least one of these. // @@ -1558,7 +1586,7 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] JitExpandArrayStack*>* levelCond = - context->EnsureBlockConditions(loopNum, condBlocks); + context->EnsureBlockConditions(loop->GetIndex(), condBlocks); for (unsigned i = 0; i < arrayDerefNodes.Size(); ++i) { arrayDerefNodes[i]->DeriveLevelConditions(levelCond); @@ -1567,7 +1595,8 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con if (objDeref->Size() > 0) { - JitExpandArrayStack*>* levelCond = context->EnsureBlockConditions(loopNum, 1); + JitExpandArrayStack*>* levelCond = + context->EnsureBlockConditions(loop->GetIndex(), 1); for (unsigned i = 0; i < objDeref->Size(); ++i) { @@ -1579,7 +1608,7 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con } } - DBEXEC(verbose, context->PrintBlockConditions(loopNum)); + DBEXEC(verbose, context->PrintBlockConditions(loop->GetIndex())); return true; } @@ -1610,11 +1639,11 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore // candidates gathered during the cloning phase. // // Arguments: -// loopNum - the current loop index for which the optimizations are performed. -// context - data structure where all loop cloning info is kept. -// dynamicPath - If true, the optimization is performed in the fast path among the -// cloned loops. If false, it means this is the only path (i.e., -// there is no slow path.) +// loop - the current loop for which the optimizations are performed. +// context - data structure where all loop cloning info is kept. +// dynamicPath - If true, the optimization is performed in the fast path among the +// cloned loops. If false, it means this is the only path (i.e., +// there is no slow path.) // // Operation: // Perform the optimizations on the fast path i.e., the path in which the @@ -1628,9 +1657,10 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore // performs the optimizations assuming that the path in which the candidates // were collected is the fast path in which the optimizations will be performed. // -void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath)) +void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, + LoopCloneContext* context DEBUGARG(bool dynamicPath)) { - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos != nullptr); for (unsigned i = 0; i < optInfos->Size(); ++i) { @@ -1738,7 +1768,7 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* // optIsLoopClonable: Determine whether this loop can be cloned. // // Arguments: -// loopInd loop index which needs to be checked if it can be cloned. +// loop - loop index which needs to be checked if it can be cloned. // // Return Value: // Returns true if the loop can be cloned. If it returns false, @@ -1748,20 +1778,14 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* // return blocks in the loop that will be cloned. (REVIEW: this 'predicate' function // doesn't seem like the right place to do this change.) // -bool Compiler::optIsLoopClonable(unsigned loopInd) +bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - const LoopDsc& loop = optLoopTable[loopInd]; - const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); - - if (requireIterable && !(loop.lpFlags & LPFLG_ITER)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". No LPFLG_ITER flag.\n", loopInd); - return false; - } + const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); - if (loop.lpIsRemoved()) + if (requireIterable && (iterInfo == nullptr)) { - JITDUMP("Loop cloning: rejecting removed loop " FMT_LP ".\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Could not analyze iteration.\n", loop->GetIndex()); return false; } @@ -1774,34 +1798,39 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // TODO: this limitation could be removed if we do the work to insert new EH regions in the exception table, // for the cloned loop (and its embedded EH regions). // - // Also, count the number of return blocks within the loop for future use. - unsigned loopRetCount = 0; - for (BasicBlock* const blk : loop.LoopBlocks()) - { - if (blk->KindIs(BBJ_RETURN)) - { - loopRetCount++; - } + BasicBlockVisit result = loop->VisitLoopBlocks([=](BasicBlock* blk) { + assert(!blk->KindIs(BBJ_RETURN)); if (bbIsTryBeg(blk)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has a `try` begin.\n", loopInd); - return false; + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has a `try` begin.\n", loop->GetIndex()); + return BasicBlockVisit::Abort; } + + return BasicBlockVisit::Continue; + }); + + if (result == BasicBlockVisit::Abort) + { + return false; } // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump // into the middle of a handler (to go to the cloned copy.) Reject. - if (bbIsHandlerBeg(loop.lpEntry)) + if (bbIsHandlerBeg(loop->GetHeader())) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Entry block is a handler start.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Header block is a handler start.\n", loop->GetIndex()); return false; } + // Loop canonicalization should have ensured that there is a unique preheader. + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + // If the head and entry are in different EH regions, reject. - if (!BasicBlock::sameEHRegion(loop.lpHead, loop.lpEntry)) + if (!BasicBlock::sameEHRegion(preheader, loop->GetHeader())) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Head and entry blocks are in different EH regions.\n", - loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Preheader and header blocks are in different EH regions.\n", + loop->GetIndex()); return false; } @@ -1811,29 +1840,29 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // that block; this is one of those cases. This could be fixed fairly easily; for example, // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the // loop. This is just a corner to cut to get this working faster. - BasicBlock* bbAfterLoop = loop.lpBottom->Next(); + // TODO-Quirk: Should rework this to avoid the lexically bottom most block here. + BasicBlock* bottom = loop->GetLexicallyBottomMostBlock(); + + BasicBlock* bbAfterLoop = bottom->Next(); if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Next block after bottom is a handler start.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Next block after bottom is a handler start.\n", + loop->GetIndex()); return false; } - // Reject cloning if this is a mid-entry loop and the entry has non-loop predecessors other than its head. - // This loop may be part of a larger looping construct that we didn't recognize. - // - // We should really fix this in optCanonicalizeLoop. + // TODO-Quirk: Reject loops that with the old loop cloning would put us + // above max number of returns. Return blocks are not considered part of + // loops in the new loop finding since they are never part of the SCC (and + // they aren't present inside try-regions). // - if (!loop.lpIsTopEntry()) + LoopDsc* oldLoop = m_newToOldLoop[loop->GetIndex()]; + unsigned loopRetCount = 0; + for (BasicBlock* const blk : oldLoop->LoopBlocks()) { - for (BasicBlock* const entryPred : loop.lpEntry->PredBlocks()) + if (blk->KindIs(BBJ_RETURN)) { - if ((entryPred != loop.lpHead) && !loop.lpContains(entryPred)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Is not top entry, and entry has multiple non-loop preds.\n", - loopInd); - return false; - } + loopRetCount++; } } @@ -1849,79 +1878,62 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". It has %d returns;" " if added to previously existing %d returns, it would exceed the limit of %d.\n", - loopInd, loopRetCount, fgReturnCount, epilogLimit); + loop->GetIndex(), loopRetCount, fgReturnCount, epilogLimit); return false; } - if (requireIterable) - { - const unsigned ivLclNum = loop.lpIterVar(); - if (lvaVarAddrExposed(ivLclNum)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Rejected V%02u as iter var because is address-exposed.\n", - loopInd, ivLclNum); - return false; - } - } + assert(!requireIterable || !lvaVarAddrExposed(iterInfo->IterVar)); - BasicBlock* top = loop.lpTop; - BasicBlock* bottom = loop.lpBottom; + // TODO-Quirk: These conditions are unnecessary. + BasicBlock* oldLoopTop = oldLoop->lpTop; + BasicBlock* oldLoopBottom = oldLoop->lpBottom; - if (!bottom->KindIs(BBJ_COND)) + if (!oldLoopBottom->KindIs(BBJ_COND)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Couldn't find termination test.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Couldn't find termination test.\n", loop->GetIndex()); return false; } - if (!bottom->HasJumpTo(top)) + if (!oldLoopBottom->HasJumpTo(oldLoopTop)) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Branch at loop 'bottom' not looping to 'top'.\n", loopInd); + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Branch at loop 'bottom' not looping to 'top'.\n", + loop->GetIndex()); return false; } if (requireIterable) { - if ((loop.lpFlags & LPFLG_CONST_LIMIT) == 0 && (loop.lpFlags & LPFLG_VAR_LIMIT) == 0 && - (loop.lpFlags & LPFLG_ARRLEN_LIMIT) == 0) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Loop limit is neither constant, variable or array length.\n", - loopInd); - return false; - } + assert(iterInfo->HasConstLimit || iterInfo->HasInvariantLocalLimit || iterInfo->HasArrayLengthLimit); // TODO-CQ: Handle other loops like: // - The ones whose limit operator is "==" or "!=" // - The incrementing operator is multiple and divide // - The ones that are inverted are not handled here for cases like "i *= 2" because // they are converted to "i + i". - if (!(loop.lpIsIncreasingLoop() || loop.lpIsDecreasingLoop())) + if (!iterInfo->IsIncreasingLoop() && !iterInfo->IsDecreasingLoop()) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop test (%s) doesn't agree with the direction (%s) of the loop.\n", - loopInd, GenTree::OpName(loop.lpTestOper()), GenTree::OpName(loop.lpIterOper())); + loop->GetIndex(), GenTree::OpName(iterInfo->TestOper()), GenTree::OpName(iterInfo->IterOper())); return false; } - if (!loop.lpTestTree->OperIsCompare() || !(loop.lpTestTree->gtFlags & GTF_RELOP_ZTT)) + if (!iterInfo->TestTree->OperIsCompare() || ((iterInfo->TestTree->gtFlags & GTF_RELOP_ZTT) == 0)) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop inversion NOT present, loop test [%06u] may not protect " "entry from head.\n", - loopInd, loop.lpTestTree->gtTreeID); + loop->GetIndex(), iterInfo->TestTree->gtTreeID); return false; } #ifdef DEBUG - const unsigned ivLclNum = loop.lpIterVar(); - GenTree* const op1 = loop.lpIterator(); + const unsigned ivLclNum = iterInfo->IterVar; + GenTree* const op1 = iterInfo->Iterator(); assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); #endif } - // Otherwise, we're going to add those return blocks. - fgReturnCount += loopRetCount; - return true; } @@ -1929,10 +1941,10 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // optInsertLoopChoiceConditions: Insert the loop conditions for a loop after the loop head. // // Arguments: -// context loop cloning context variable -// loopNum the loop index -// slowHead the slow path loop head, where the condition failures branch -// insertAfter insert the conditions after this block +// context - loop cloning context variable +// loop - the loop +// slowHead - the slow path loop head, where the condition failures branch +// insertAfter - insert the conditions after this block // // Return Value: // The last condition block added. @@ -1949,33 +1961,34 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) // ... // slowHead -?> e2 (slowHead) branch or fall-through to e2 // -BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, - unsigned loopNum, - BasicBlock* slowHead, - BasicBlock* insertAfter) +BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, + FlowGraphNaturalLoop* loop, + BasicBlock* slowHead, + BasicBlock* insertAfter) { - JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loopNum); - assert(context->HasBlockConditions(loopNum)); + JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex()); + assert(context->HasBlockConditions(loop->GetIndex())); assert(slowHead != nullptr); assert(insertAfter->KindIs(BBJ_ALWAYS)); assert(insertAfter->JumpsToNext()); - if (context->HasBlockConditions(loopNum)) + if (context->HasBlockConditions(loop->GetIndex())) { - JitExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); + JitExpandArrayStack*>* levelCond = + context->GetBlockConditions(loop->GetIndex()); for (unsigned i = 0; i < levelCond->Size(); ++i) { - JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loopNum, i); + JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loop->GetIndex(), i); DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowHead, insertAfter); } } // Finally insert cloning conditions after all deref conditions have been inserted. - JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loopNum); - DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP("Adding loop " FMT_LP " cloning conditions\n ", loop->GetIndex()); + DBEXEC(verbose, context->PrintConditions(loop->GetIndex())); JITDUMP("\n"); - insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), slowHead, insertAfter); + insertAfter = context->CondToStmtInBlock(this, *(context->GetConditions(loop->GetIndex())), slowHead, insertAfter); return insertAfter; } @@ -1984,21 +1997,25 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, // optCloneLoop: Perform the mechanical cloning of the specified loop // // Arguments: -// loopInd - loop index of loop to clone +// loop - The loop to clone // context - data structure where all loop cloning info is kept. // -void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) +void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - assert(loopInd < optLoopCount); - - LoopDsc& loop = optLoopTable[loopInd]; +#ifdef DEBUG + if (verbose) + { + printf("\nCloning loop " FMT_LP " with blocks:\n", loop->GetIndex()); - JITDUMP("\nCloning loop " FMT_LP ": [head: " FMT_BB ", top: " FMT_BB ", entry: " FMT_BB ", bottom: " FMT_BB - ", child: " FMT_LP "].\n", - loopInd, loop.lpHead->bbNum, loop.lpTop->bbNum, loop.lpEntry->bbNum, loop.lpBottom->bbNum, loop.lpChild); + loop->VisitLoopBlocksReversePostOrder([](BasicBlock* block) { + printf(" " FMT_BB "\n", block->bbNum); + return BasicBlockVisit::Continue; + }); + } +#endif // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). - unsigned depth = optLoopDepth(loopInd); + unsigned depth = optLoopDepth((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable)); weight_t ambientWeight = 1; for (unsigned j = 0; j < depth; j++) { @@ -2007,9 +2024,11 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) assert(ambientWeight > lastWeight); } - // If we're in a non-natural loop, the ambient weight might be higher than we computed above. - // Be safe by taking the max with the head block's weight. - ambientWeight = max(ambientWeight, loop.lpHead->bbWeight); + assert(loop->EntryEdges().size() == 1); + BasicBlock* preheader = loop->EntryEdges()[0]->getSourceBlock(); + // The ambient weight might be higher than we computed above. Be safe by + // taking the max with the head block's weight. + ambientWeight = max(ambientWeight, preheader->bbWeight); // We assume that the fast path will run 99% of the time, and thus should get 99% of the block weights. // The slow path will, correspondingly, get only 1% of the block weights. It could be argued that we should @@ -2020,8 +2039,9 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // This is the containing loop, if any -- to label any blocks we create that are outside // the loop being cloned. - unsigned char ambientLoop = loop.lpParent; + unsigned char ambientLoop = m_newToOldLoop[loop->GetIndex()]->lpParent; + BasicBlock* h = preheader; // We're going to transform this loop: // // H --> E (if T == E, H falls through to T/E) @@ -2044,13 +2064,12 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // B2 ?-> T2 // X - BasicBlock* h = loop.lpHead; assert((h->bbFlags & BBF_LOOP_PREHEADER) != 0); // Make a new pre-header block 'h2' for the loop. 'h' will fall through to 'h2'. JITDUMP("Create new header block for loop\n"); - BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true, /*jumpDest*/ loop.lpEntry); + BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true, /*jumpDest*/ loop->GetHeader()); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", h2->bbNum, h->bbNum); h2->bbWeight = h2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; h2->bbNatLoopNum = ambientLoop; @@ -2062,15 +2081,15 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) } assert(h->KindIs(BBJ_ALWAYS)); - assert(h->HasJumpTo(loop.lpEntry)); + assert(h->HasJumpTo(loop->GetHeader())); - fgReplacePred(loop.lpEntry, h, h2); - JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop.lpEntry->bbNum, - h2->bbNum, loop.lpEntry->bbNum); + fgReplacePred(loop->GetHeader(), h, h2); + JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop->GetHeader()->bbNum, + h2->bbNum, loop->GetHeader()->bbNum); // 'h' is no longer the loop head; 'h2' is! h->bbFlags &= ~BBF_LOOP_PREHEADER; - optUpdateLoopHead(loopInd, h, h2); + optUpdateLoopHead((unsigned)(m_newToOldLoop[loop->GetIndex()] - optLoopTable), h, h2); // Make 'h' fall through to 'h2' (if it didn't already). // Don't add the h->h2 edge because we're going to insert the cloning conditions between 'h' and 'h2', and @@ -2081,7 +2100,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Make X2 after B, if necessary. (Not necessary if B is a BBJ_ALWAYS.) // "newPred" will be the predecessor of the blocks of the cloned loop. - BasicBlock* b = loop.lpBottom; + BasicBlock* b = loop->GetLexicallyBottomMostBlock(); BasicBlock* newPred = b; if (!b->KindIs(BBJ_ALWAYS)) { @@ -2127,9 +2146,10 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. - BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - for (BasicBlock* const blk : loop.LoopBlocks()) - { + BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); + unsigned numBlocksLeft = loop->NumLoopBlocks(); + + loop->VisitLoopBlocksLexical([=, &newPred, &numBlocksLeft](BasicBlock* blk) { // Initialize newBlk as BBJ_ALWAYS without jump target, and fix up jump target later with optCopyBlkDest() BasicBlock* newBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); @@ -2173,19 +2193,58 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding // loop, if one exists -- the parent of the loop we're cloning. - newBlk->bbNatLoopNum = loop.lpParent; + newBlk->bbNatLoopNum = ambientLoop; newPred = newBlk; blockMap->Set(blk, newBlk); - } + + numBlocksLeft--; + assert(numBlocksLeft >= 0); + + // If the block falls through to a block outside the loop then we may + // need to insert a new block to redirect. + if ((numBlocksLeft > 0) && blk->bbFallsThrough() && !loop->ContainsBlock(blk->Next())) + { + if (blk->KindIs(BBJ_COND)) + { + // TODO-Quirk: We see a lot of these cases and some of them cause diffs. + BasicBlock* targetBlk = blk->Next(); + if (targetBlk->KindIs(BBJ_ALWAYS) && targetBlk->isEmpty()) + targetBlk = targetBlk->GetJumpDest(); + + // Need to insert a block. + BasicBlock* newRedirBlk = fgNewBBafter(BBJ_ALWAYS, newPred, /* extendRegion */ true, targetBlk); + newRedirBlk->copyEHRegion(newPred); + newRedirBlk->bbNatLoopNum = ambientLoop; + newRedirBlk->bbWeight = blk->Next()->bbWeight; + newRedirBlk->bbFlags |= blk->Next()->bbFlags & (BBF_RUN_RARELY | BBF_PROF_WEIGHT); + newRedirBlk->scaleBBWeight(slowPathWeightScaleFactor); + + // TODO-Quirk: This next block is not part of the loop and + // should not be scaled down, especially once we get rid of the + // other quirk above. + blk->Next()->scaleBBWeight(fastPathWeightScaleFactor); + + // This block isn't part of the loop, so below loop won't add + // refs for it. + fgAddRefPred(targetBlk, newRedirBlk); + newPred = newRedirBlk; + } + else + { + assert(!"Cannot handle fallthrough"); + } + } + + return BasicBlockVisit::Continue; + }); // Perform the static optimizations on the fast path. - optPerformStaticOptimizations(loopInd, context DEBUGARG(true)); + optPerformStaticOptimizations(loop, context DEBUGARG(true)); // Now go through the new blocks, remapping their jump targets within the loop // and updating the preds lists. - for (BasicBlock* const blk : loop.LoopBlocks()) - { + loop->VisitLoopBlocks([=](BasicBlock* blk) { BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2222,13 +2281,14 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) default: break; } - } + + return BasicBlockVisit::Continue; + }); #ifdef DEBUG // Display the preds for the new blocks, after all the new blocks have been redirected. JITDUMP("Preds after loop copy:\n"); - for (BasicBlock* const blk : loop.LoopBlocks()) - { + loop->VisitLoopBlocksReversePostOrder([=](BasicBlock* blk) { BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); @@ -2238,7 +2298,9 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) JITDUMP(" " FMT_BB, predBlock->bbNum); } JITDUMP("\n"); - } + + return BasicBlockVisit::Continue; + }); #endif // DEBUG // Insert the loop choice conditions. We will create the following structure: @@ -2254,13 +2316,13 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // // We should always have block conditions. - assert(context->HasBlockConditions(loopInd)); + assert(context->HasBlockConditions(loop->GetIndex())); assert(h->KindIs(BBJ_ALWAYS)); assert(h->HasJumpTo(h2)); // If any condition is false, go to slowHead (which branches or falls through to e2). BasicBlock* e2 = nullptr; - bool foundIt = blockMap->Lookup(loop.lpEntry, &e2); + bool foundIt = blockMap->Lookup(loop->GetHeader(), &e2); assert(foundIt && e2 != nullptr); // We haven't set the jump target yet @@ -2271,7 +2333,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) fgAddRefPred(e2, slowHead); JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", slowHead->bbNum, e2->bbNum); - BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, slowHead, h); + BasicBlock* condLast = optInsertLoopChoiceConditions(context, loop, slowHead, h); // Add the fall-through path pred (either to T/E for fall-through from conditions to fast path, // or H2 if branch to E of fast path). @@ -2283,29 +2345,32 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // initialize the loop counter immediately before entering the loop, but we've left a shared // initialization of the loop counter up above the test that determines which version of the // loop to take. - loop.lpFlags |= LPFLG_DONT_UNROLL; + m_newToOldLoop[loop->GetIndex()]->lpFlags |= LPFLG_DONT_UNROLL; } //------------------------------------------------------------------------- // optIsStackLocalInvariant: Is stack local invariant in loop. // // Arguments: -// loopNum The loop in which the variable is tested for invariance. -// lclNum The local that is tested for invariance in the loop. +// loop - The loop in which the variable is tested for invariance. +// lclNum - The local that is tested for invariance in the loop. // // Return Value: // Returns true if the variable is loop invariant in loopNum. // -bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum) +bool Compiler::optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum) { if (lvaVarAddrExposed(lclNum)) { return false; } - if (optIsVarAssgLoop(loopNum, lclNum)) + + // TODO: Cache this invariance information + if (loop->HasDef(lclNum)) { return false; } + return true; } @@ -2649,22 +2714,24 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop #endif // Check that the array object local variable is invariant within the loop body. - if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl)) + if (!optIsStackLocalInvariant(info->loop, arrIndex.arrLcl)) { JITDUMP("V%02d is not loop invariant\n", arrIndex.arrLcl); return WALK_SKIP_SUBTREES; } + NaturalLoopIterInfo* iterInfo = info->context->GetLoopIterInfo(info->loop->GetIndex()); + // Walk the dimensions and see if iterVar of the loop is used as index. for (unsigned dim = 0; dim < arrIndex.rank; ++dim) { // Is index variable also used as the loop iter var? - if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar()) + if (arrIndex.indLcls[dim] == iterInfo->IterVar) { // Check the previous indices are all loop invariant. for (unsigned dim2 = 0; dim2 < dim; ++dim2) { - if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2])) + if (!optIsStackLocalInvariant(info->loop, arrIndex.indLcls[dim2])) { JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]); return WALK_SKIP_SUBTREES; @@ -2673,19 +2740,18 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop #ifdef DEBUG if (verbose) { - printf("Loop " FMT_LP " can be cloned for ArrIndex ", info->loopNum); + printf("Loop " FMT_LP " can be cloned for ArrIndex ", info->loop->GetIndex()); arrIndex.Print(); printf(" on dim %d\n", dim); } #endif // Update the loop context. - info->context->EnsureLoopOptInfo(info->loopNum) + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt)); } else { - JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), - dim); + JITDUMP("Induction V%02d is not used as index on dim %d\n", iterInfo->IterVar, dim); } } return WALK_SKIP_SUBTREES; @@ -2747,7 +2813,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop JITDUMP("... right form for type test with local V%02u\n", lclNum); - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + if (!optIsStackLocalInvariant(info->loop, lclNum)) { JITDUMP("... but not invariant\n"); return WALK_CONTINUE; @@ -2755,8 +2821,8 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop // Looks like we found an invariant type test. // - JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loopNum, dspTreeID(tree), - lclNum); + JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loop->GetIndex(), + dspTreeID(tree), lclNum); if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) { @@ -2766,7 +2832,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); assert(compCurBB->lastStmt() == info->stmt); - info->context->EnsureLoopOptInfo(info->loopNum) + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(info->stmt, indir, lclNum, clsHnd)); } } @@ -2830,13 +2896,13 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_CONTINUE; } - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + if (!optIsStackLocalInvariant(info->loop, lclNum)) { JITDUMP("... but not invariant\n"); return WALK_CONTINUE; } - JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loopNum, + JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loop->GetIndex(), dspTreeID(tree), lclNum); if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) @@ -2851,7 +2917,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop LcMethodAddrTestOptInfo(info->stmt, indir, lclNum, (void*)iconHandle->IconValue(), relopOp2 != iconHandle DEBUG_ARG( (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle)); - info->context->EnsureLoopOptInfo(info->loopNum)->Push(optInfo); + info->context->EnsureLoopOptInfo(info->loop->GetIndex())->Push(optInfo); } } } @@ -2896,16 +2962,15 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // (3) the test is frequently hit during the loop iteration // (4) the test is biased and highly likely to succeed // - const LoopDsc& loopDsc = optLoopTable[info->loopNum]; - BasicBlock* const loopEntry = loopDsc.lpEntry; - BasicBlock* const typeTestBlock = compCurBB; - double const loopFrequency = 0.50; - double const typeTestFrequency = 0.50; - double const typeTestBias = 0.05; + FlowGraphNaturalLoop* loop = info->loop; + BasicBlock* const typeTestBlock = compCurBB; + double const loopFrequency = 0.50; + double const typeTestFrequency = 0.50; + double const typeTestBias = 0.05; // Check for (1) // - if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) + if (!loop->GetHeader()->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) { JITDUMP(" No; loop does not have profile data.\n"); return WALK_CONTINUE; @@ -2913,7 +2978,7 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // Check for (2) // - if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) + if (loop->GetHeader()->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) { JITDUMP(" No; loop does not iterate often enough.\n"); return WALK_CONTINUE; @@ -2921,7 +2986,7 @@ bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneV // Check for (3) // - if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + if (typeTestBlock->bbWeight < (typeTestFrequency * loop->GetHeader()->bbWeight)) { JITDUMP(" No; guard does not execute often enough within the loop.\n"); return WALK_CONTINUE; @@ -2971,8 +3036,8 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // Also, check if the loop is suitable for the optimizations performed. // // Arguments: -// loopNum - the current loop index for which conditions are derived. -// context - data structure where all loop cloning candidates will be updated. +// loop - Loop being analyzed +// context - data structure where all loop cloning candidates will be updated. // // Return Value: // If the loop is not suitable for the optimizations, return false - context @@ -2984,16 +3049,15 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // optimization candidates and update the "context" parameter with all the // contextual information necessary to perform the optimization later. // -bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) +bool Compiler::optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context) { - const LoopDsc& loop = optLoopTable[loopNum]; - const bool canCloneForArrayBounds = - ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && ((loop.lpFlags & LPFLG_ITER) != 0); - const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); + NaturalLoopIterInfo* iterInfo = context->GetLoopIterInfo(loop->GetIndex()); + const bool canCloneForArrayBounds = ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && (iterInfo != nullptr); + const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); if (!canCloneForArrayBounds && !canCloneForTypeTests) { - JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loopNum); + JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loop->GetIndex()); return false; } @@ -3004,12 +3068,13 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex shouldCloneForGdvTests &= JitConfig.JitCloneLoopsWithGdvTests() != 0; #endif - JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loopNum, + JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loop->GetIndex(), shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForGdvTests ? " (GDV tests)" : ""); - LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); - for (BasicBlock* const block : loop.LoopBlocks()) - { + LoopCloneVisitorInfo info(context, loop, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); + + // TODO-Quirk: Switch this to VisitLoopBlocks + loop->VisitLoopBlocksLexical([=, &info](BasicBlock* block) { compCurBB = block; for (Statement* const stmt : block->Statements()) { @@ -3019,7 +3084,9 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex fgWalkTreePre(stmt->GetRootNodePointer(), optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly, computeStack); } - } + + return BasicBlockVisit::Continue; + }); return true; } @@ -3041,14 +3108,22 @@ bool Compiler::optObtainLoopCloningOpts(LoopCloneContext* context) bool result = false; for (unsigned i = 0; i < optLoopCount; i++) { - JITDUMP("Considering loop " FMT_LP " to clone for optimizations.\n", i); - if (optIsLoopClonable(i)) + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + JITDUMP("Considering loop " FMT_LP " to clone for optimizations.\n", loop->GetIndex()); + NaturalLoopIterInfo iterInfo; + // TODO-Quirk: Remove + if ((optLoopTable[i].lpFlags & LPFLG_ITER) != 0) { - if (optIdentifyLoopOptInfo(i, context)) + if (loop->AnalyzeIteration(&iterInfo)) { - result = true; + context->SetLoopIterInfo(loop->GetIndex(), new (this, CMK_LoopClone) NaturalLoopIterInfo(iterInfo)); } } + + if (optIsLoopClonable(loop, context) && optIdentifyLoopOptInfo(loop, context)) + { + result = true; + } JITDUMP("------------------------------------------------------------\n"); } JITDUMP("\n"); @@ -3097,7 +3172,7 @@ PhaseStatus Compiler::optCloneLoops() return PhaseStatus::MODIFIED_NOTHING; } - LoopCloneContext context(optLoopCount, getAllocator(CMK_LoopClone)); + LoopCloneContext context((unsigned)m_loops->NumLoops(), getAllocator(CMK_LoopClone)); // Obtain array optimization candidates in the context. if (!optObtainLoopCloningOpts(&context)) @@ -3110,38 +3185,40 @@ PhaseStatus Compiler::optCloneLoops() unsigned optStaticallyOptimizedLoops = 0; // For each loop, derive cloning conditions for the optimization candidates. - for (unsigned i = 0; i < optLoopCount; ++i) + for (unsigned i = 0; i < optLoopCount; i++) { - JitExpandArrayStack* optInfos = context.GetLoopOptInfo(i); + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + + JitExpandArrayStack* optInfos = context.GetLoopOptInfo(loop->GetIndex()); if (optInfos == nullptr) { continue; } - if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context)) + if (!optDeriveLoopCloningConditions(loop, &context) || !optComputeDerefConditions(loop, &context)) { JITDUMP("> Conditions could not be obtained\n"); - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } else { bool allTrue = false; bool anyFalse = false; - context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose)); + context.EvaluateConditions(loop->GetIndex(), &allTrue, &anyFalse DEBUGARG(verbose)); if (anyFalse) { - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } else if (allTrue) { // Perform static optimizations on the fast path since we always // have to take the cloned path. - optPerformStaticOptimizations(i, &context DEBUGARG(false)); + optPerformStaticOptimizations(loop, &context DEBUGARG(false)); ++optStaticallyOptimizedLoops; // No need to clone. - context.CancelLoopOptInfo(i); + context.CancelLoopOptInfo(loop->GetIndex()); } } } @@ -3176,12 +3253,13 @@ PhaseStatus Compiler::optCloneLoops() assert(optLoopsCloned == 0); // It should be initialized, but not yet changed. for (unsigned i = 0; i < optLoopCount; ++i) { - if (context.GetLoopOptInfo(i) != nullptr) + FlowGraphNaturalLoop* loop = m_oldToNewLoop[i]; + if (context.GetLoopOptInfo(loop->GetIndex()) != nullptr) { optLoopsCloned++; - context.OptimizeConditions(i DEBUGARG(verbose)); - context.OptimizeBlockConditions(i DEBUGARG(verbose)); - optCloneLoop(i, &context); + context.OptimizeConditions(loop->GetIndex() DEBUGARG(verbose)); + context.OptimizeBlockConditions(loop->GetIndex() DEBUGARG(verbose)); + optCloneLoop(loop, &context); } } @@ -3190,6 +3268,9 @@ PhaseStatus Compiler::optCloneLoops() JITDUMP("Recompute reachability and dominators after loop cloning\n"); // TODO: recompute the loop table, to include the slow loop path in the table? fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_DOMS); + + m_dfs = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); } #ifdef DEBUG diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 81cd13354b11a0..9279bc783a9ecf 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -807,6 +807,7 @@ struct LC_ArrayDeref #endif }; +struct NaturalLoopIterInfo; /** * * The "context" represents data that is used for making loop-cloning decisions. @@ -841,16 +842,28 @@ struct LoopCloneContext // The array of block levels of conditions for each loop. (loop x level x conditions) jitstd::vector*>*> blockConditions; + jitstd::vector iterInfo; + LoopCloneContext(unsigned loopCount, CompAllocator alloc) - : alloc(alloc), optInfo(alloc), conditions(alloc), arrayDerefs(alloc), objDerefs(alloc), blockConditions(alloc) + : alloc(alloc) + , optInfo(alloc) + , conditions(alloc) + , arrayDerefs(alloc) + , objDerefs(alloc) + , blockConditions(alloc) + , iterInfo(alloc) { optInfo.resize(loopCount, nullptr); conditions.resize(loopCount, nullptr); arrayDerefs.resize(loopCount, nullptr); objDerefs.resize(loopCount, nullptr); blockConditions.resize(loopCount, nullptr); + iterInfo.resize(loopCount, nullptr); } + NaturalLoopIterInfo* GetLoopIterInfo(unsigned loopNum); + void SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info); + // Evaluate conditions into a JTRUE stmt and put it in a new block after `insertAfter`. BasicBlock* CondToStmtInBlock(Compiler* comp, JitExpandArrayStack& conds, diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 7c5692cba8a54f..fa98ae43eea77c 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5539,6 +5539,55 @@ void Compiler::optFindLoops() PhaseStatus Compiler::optFindLoopsPhase() { optFindLoops(); + + m_dfs = fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); + + m_newToOldLoop = m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); + m_oldToNewLoop = new (this, CMK_Loops) FlowGraphNaturalLoop*[BasicBlock::MAX_LOOP_NUM]{}; + + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) + { + BasicBlock* head = loop->GetHeader(); + if (head->bbNatLoopNum == BasicBlock::NOT_IN_LOOP) + continue; + + LoopDsc* dsc = &optLoopTable[head->bbNatLoopNum]; + if (dsc->lpEntry != head) + continue; + + assert(m_oldToNewLoop[head->bbNatLoopNum] == nullptr); + assert(m_newToOldLoop[loop->GetIndex()] == nullptr); + m_oldToNewLoop[head->bbNatLoopNum] = loop; + m_newToOldLoop[loop->GetIndex()] = dsc; + } + +#ifdef DEBUG + for (unsigned i = 0; i < optLoopCount; i++) + { + assert(m_oldToNewLoop[i] != nullptr); + LoopDsc* dsc = &optLoopTable[i]; + if ((dsc->lpFlags & LPFLG_ITER) == 0) + continue; + + NaturalLoopIterInfo iter; + bool analyzed = m_oldToNewLoop[i]->AnalyzeIteration(&iter); + assert(analyzed); + + assert(iter.HasConstInit == ((dsc->lpFlags & LPFLG_CONST_INIT) != 0)); + assert(iter.HasConstLimit == ((dsc->lpFlags & LPFLG_CONST_LIMIT) != 0)); + assert(iter.HasSimdLimit == ((dsc->lpFlags & LPFLG_SIMD_LIMIT) != 0)); + assert(iter.HasInvariantLocalLimit == ((dsc->lpFlags & LPFLG_VAR_LIMIT) != 0)); + assert(iter.HasArrayLengthLimit == ((dsc->lpFlags & LPFLG_ARRLEN_LIMIT) != 0)); + if (iter.HasConstInit) + { + assert(iter.ConstInitValue == dsc->lpConstInit); + assert(iter.InitBlock == dsc->lpInitBlock); + } + assert(iter.TestTree == dsc->lpTestTree); + } +#endif + return PhaseStatus::MODIFIED_EVERYTHING; }