diff options
author | XChy <xxs_chy@outlook.com> | 2024-04-27 16:10:16 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-27 16:10:16 +0800 |
commit | c229f767e48c7190b7568e6ebd1688bb08795744 (patch) | |
tree | a287b5a59b954b3a8b426d0affa5b593102667a3 | |
parent | 715219482b99ceef9bf83a2ff68c64c8faa930cd (diff) |
[DFAJumpThreading] Avoid exploring the paths that never come back (#85505)
This patch does:
- Preserve loop info when unfolding selects.
- Reduce the search space for loop paths.
-rw-r--r-- | llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 42 |
1 files changed, 33 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 1caed93b1b66..14cdcc956aa5 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -131,7 +131,7 @@ public: explicit operator bool() const { return SI && SIUse; } }; -void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, +void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, std::vector<SelectInstToUnfold> *NewSIsToUnfold, std::vector<BasicBlock *> *NewBBs); @@ -142,6 +142,7 @@ public: : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} bool run(Function &F); + bool LoopInfoBroken; private: void @@ -157,7 +158,7 @@ private: std::vector<SelectInstToUnfold> NewSIsToUnfold; std::vector<BasicBlock *> NewBBs; - unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); + unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); // Put newly discovered select instructions into the work list. for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) @@ -201,7 +202,7 @@ void createBasicBlockAndSinkSelectInst( /// created basic blocks into \p NewBBs. /// /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. -void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, +void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, std::vector<SelectInstToUnfold> *NewSIsToUnfold, std::vector<BasicBlock *> *NewBBs) { SelectInst *SI = SIToUnfold.getInst(); @@ -307,6 +308,12 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, {DominatorTree::Insert, StartBlock, FT}}); + // Preserve loop info + if (Loop *L = LI->getLoopFor(SI->getParent())) { + for (BasicBlock *NewBB : *NewBBs) + L->addBasicBlockToLoop(NewBB, *LI); + } + // The select is now dead. assert(SI->use_empty() && "Select must be dead now"); SI->eraseFromParent(); @@ -522,9 +529,10 @@ private: }; struct AllSwitchPaths { - AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) - : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), - ORE(ORE) {} + AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE, + LoopInfo *LI) + : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE), + LI(LI) {} std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } unsigned getNumThreadingPaths() { return TPaths.size(); } @@ -596,6 +604,12 @@ private: Visited.insert(BB); + // Stop if we have reached the BB out of loop, since its successors have no + // impact on the DFA. + // TODO: Do we need to stop exploring if BB is the outer loop of the switch? + if (!LI->getLoopFor(BB)) + return Res; + // Some blocks have multiple edges to the same successor, and this set // is used to prevent a duplicate path from being generated SmallSet<BasicBlock *, 4> Successors; @@ -737,6 +751,7 @@ private: BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; std::vector<ThreadingPath> TPaths; + LoopInfo *LI; }; struct TransformDFA { @@ -1283,6 +1298,7 @@ bool DFAJumpThreading::run(Function &F) { SmallVector<AllSwitchPaths, 2> ThreadableLoops; bool MadeChanges = false; + LoopInfoBroken = false; for (BasicBlock &BB : F) { auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); @@ -1304,7 +1320,7 @@ bool DFAJumpThreading::run(Function &F) { if (!Switch.getSelectInsts().empty()) MadeChanges = true; - AllSwitchPaths SwitchPaths(&Switch, ORE); + AllSwitchPaths SwitchPaths(&Switch, ORE, LI); SwitchPaths.run(); if (SwitchPaths.getNumThreadingPaths() > 0) { @@ -1315,10 +1331,15 @@ bool DFAJumpThreading::run(Function &F) { // strict requirement but it can cause buggy behavior if there is an // overlap of blocks in different opportunities. There is a lot of room to // experiment with catching more opportunities here. + // NOTE: To release this contraint, we must handle LoopInfo invalidation break; } } +#ifdef NDEBUG + LI->verify(*DT); +#endif + SmallPtrSet<const Value *, 32> EphValues; if (ThreadableLoops.size() > 0) CodeMetrics::collectEphemeralValues(&F, AC, EphValues); @@ -1327,6 +1348,7 @@ bool DFAJumpThreading::run(Function &F) { TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); Transform.run(); MadeChanges = true; + LoopInfoBroken = true; } #ifdef EXPENSIVE_CHECKS @@ -1347,11 +1369,13 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F, LoopInfo &LI = AM.getResult<LoopAnalysis>(F); TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); OptimizationRemarkEmitter ORE(&F); - - if (!DFAJumpThreading(&AC, &DT, &LI, &TTI, &ORE).run(F)) + DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); + if (!ThreadImpl.run(F)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve<DominatorTreeAnalysis>(); + if (!ThreadImpl.LoopInfoBroken) + PA.preserve<LoopAnalysis>(); return PA; } |