mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	R600: New control flow for SI v2
This patch replaces the control flow handling with a new pass which structurize the graph before transforming it to machine instruction. This has a couple of different advantages and currently fixes 20 piglit tests without a single regression. It is now a general purpose transformation that could be not only be used for SI/R6xx, but also for other hardware implementations that use a form of structurized control flow. v2: further cleanup, fixes and documentation Patch by: Christian König Signed-off-by: Christian König <deathsimple@vodafone.de> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> Tested-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170591 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -25,13 +25,14 @@ FunctionPass* createR600KernelParametersPass(const DataLayout *TD); | ||||
| FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); | ||||
|  | ||||
| // SI Passes | ||||
| FunctionPass *createSIAnnotateControlFlowPass(); | ||||
| FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); | ||||
| FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); | ||||
| FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); | ||||
| FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); | ||||
| FunctionPass *createSIFixSGPRLivenessPass(TargetMachine &tm); | ||||
|  | ||||
| // Passes common to R600 and SI | ||||
| Pass *createAMDGPUStructurizeCFGPass(); | ||||
| FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); | ||||
|  | ||||
| } // End namespace llvm | ||||
|   | ||||
							
								
								
									
										732
									
								
								lib/Target/R600/AMDGPUStructurizeCFG.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										732
									
								
								lib/Target/R600/AMDGPUStructurizeCFG.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,732 @@ | ||||
| //===-- AMDGPUStructurizeCFG.cpp -  ------------------===// | ||||
| // | ||||
| //                     The LLVM Compiler Infrastructure | ||||
| // | ||||
| // This file is distributed under the University of Illinois Open Source | ||||
| // License. See LICENSE.TXT for details. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
| // | ||||
| /// \file | ||||
| /// The pass implemented in this file transforms the programs control flow | ||||
| /// graph into a form that's suitable for code generation on hardware that | ||||
| /// implements control flow by execution masking. This currently includes all | ||||
| /// AMD GPUs but may as well be useful for other types of hardware. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| #include "AMDGPU.h" | ||||
| #include "llvm/Module.h" | ||||
| #include "llvm/ADT/SCCIterator.h" | ||||
| #include "llvm/Analysis/RegionIterator.h" | ||||
| #include "llvm/Analysis/RegionInfo.h" | ||||
| #include "llvm/Analysis/RegionPass.h" | ||||
| #include "llvm/Transforms/Utils/SSAUpdater.h" | ||||
|  | ||||
| using namespace llvm; | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| // Definition of the complex types used in this pass. | ||||
|  | ||||
| typedef std::pair<BasicBlock *, Value *> BBValuePair; | ||||
| typedef ArrayRef<BasicBlock*> BBVecRef; | ||||
|  | ||||
| typedef SmallVector<RegionNode*, 8> RNVector; | ||||
| typedef SmallVector<BasicBlock*, 8> BBVector; | ||||
| typedef SmallVector<BBValuePair, 2> BBValueVector; | ||||
|  | ||||
| typedef DenseMap<PHINode *, BBValueVector> PhiMap; | ||||
| typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap; | ||||
| typedef DenseMap<BasicBlock *, Value *> BBPredicates; | ||||
| typedef DenseMap<BasicBlock *, BBPredicates> PredMap; | ||||
| typedef DenseMap<BasicBlock *, unsigned> VisitedMap; | ||||
|  | ||||
| // The name for newly created blocks. | ||||
|  | ||||
| static const char *FlowBlockName = "Flow"; | ||||
|  | ||||
| /// @brief Transforms the control flow graph on one single entry/exit region | ||||
| /// at a time. | ||||
| /// | ||||
| /// After the transform all "If"/"Then"/"Else" style control flow looks like | ||||
| /// this: | ||||
| /// | ||||
| /// \verbatim | ||||
| /// 1 | ||||
| /// || | ||||
| /// | | | ||||
| /// 2 | | ||||
| /// | / | ||||
| /// |/    | ||||
| /// 3 | ||||
| /// ||   Where: | ||||
| /// | |  1 = "If" block, calculates the condition | ||||
| /// 4 |  2 = "Then" subregion, runs if the condition is true | ||||
| /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow | ||||
| /// |/   4 = "Else" optional subregion, runs if the condition is false | ||||
| /// 5    5 = "End" block, also rejoins the control flow | ||||
| /// \endverbatim | ||||
| /// | ||||
| /// Control flow is expressed as a branch where the true exit goes into the | ||||
| /// "Then"/"Else" region, while the false exit skips the region | ||||
| /// The condition for the optional "Else" region is expressed as a PHI node. | ||||
| /// The incomming values of the PHI node are true for the "If" edge and false | ||||
| /// for the "Then" edge. | ||||
| /// | ||||
| /// Additionally to that even complicated loops look like this: | ||||
| /// | ||||
| /// \verbatim | ||||
| /// 1 | ||||
| /// || | ||||
| /// | | | ||||
| /// 2 ^  Where: | ||||
| /// | /  1 = "Entry" block | ||||
| /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block | ||||
| /// 3    3 = "Flow" block, with back edge to entry block | ||||
| /// | | ||||
| /// \endverbatim | ||||
| /// | ||||
| /// The back edge of the "Flow" block is always on the false side of the branch | ||||
| /// while the true side continues the general flow. So the loop condition | ||||
| /// consist of a network of PHI nodes where the true incoming values expresses | ||||
| /// breaks and the false values expresses continue states. | ||||
| class AMDGPUStructurizeCFG : public RegionPass { | ||||
|  | ||||
|   static char ID; | ||||
|  | ||||
|   Type *Boolean; | ||||
|   ConstantInt *BoolTrue; | ||||
|   ConstantInt *BoolFalse; | ||||
|   UndefValue *BoolUndef; | ||||
|  | ||||
|   Function *Func; | ||||
|   Region *ParentRegion; | ||||
|  | ||||
|   DominatorTree *DT; | ||||
|  | ||||
|   RNVector Order; | ||||
|   VisitedMap Visited; | ||||
|   PredMap Predicates; | ||||
|   BBPhiMap DeletedPhis; | ||||
|   BBVector FlowsInserted; | ||||
|  | ||||
|   BasicBlock *LoopStart; | ||||
|   BasicBlock *LoopEnd; | ||||
|   BBPredicates LoopPred; | ||||
|  | ||||
|   void orderNodes(); | ||||
|  | ||||
|   void buildPredicate(BranchInst *Term, unsigned Idx, | ||||
|                       BBPredicates &Pred, bool Invert); | ||||
|  | ||||
|   void analyzeBlock(BasicBlock *BB); | ||||
|  | ||||
|   void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx); | ||||
|  | ||||
|   void collectInfos(); | ||||
|  | ||||
|   bool dominatesPredicates(BasicBlock *A, BasicBlock *B); | ||||
|  | ||||
|   void killTerminator(BasicBlock *BB); | ||||
|  | ||||
|   RegionNode *skipChained(RegionNode *Node); | ||||
|  | ||||
|   void delPhiValues(BasicBlock *From, BasicBlock *To); | ||||
|  | ||||
|   void addPhiValues(BasicBlock *From, BasicBlock *To); | ||||
|  | ||||
|   BasicBlock *getNextFlow(BasicBlock *Prev); | ||||
|  | ||||
|   bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node); | ||||
|  | ||||
|   BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node); | ||||
|  | ||||
|   void createFlow(); | ||||
|  | ||||
|   void insertConditions(); | ||||
|  | ||||
|   void rebuildSSA(); | ||||
|  | ||||
| public: | ||||
|   AMDGPUStructurizeCFG(): | ||||
|     RegionPass(ID) { | ||||
|  | ||||
|     initializeRegionInfoPass(*PassRegistry::getPassRegistry()); | ||||
|   } | ||||
|  | ||||
|   virtual bool doInitialization(Region *R, RGPassManager &RGM); | ||||
|  | ||||
|   virtual bool runOnRegion(Region *R, RGPassManager &RGM); | ||||
|  | ||||
|   virtual const char *getPassName() const { | ||||
|     return "AMDGPU simplify control flow"; | ||||
|   } | ||||
|  | ||||
|   void getAnalysisUsage(AnalysisUsage &AU) const { | ||||
|  | ||||
|     AU.addRequired<DominatorTree>(); | ||||
|     AU.addPreserved<DominatorTree>(); | ||||
|     RegionPass::getAnalysisUsage(AU); | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| } // end anonymous namespace | ||||
|  | ||||
| char AMDGPUStructurizeCFG::ID = 0; | ||||
|  | ||||
| /// \brief Initialize the types and constants used in the pass | ||||
| bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { | ||||
|  | ||||
|   LLVMContext &Context = R->getEntry()->getContext(); | ||||
|  | ||||
|   Boolean = Type::getInt1Ty(Context); | ||||
|   BoolTrue = ConstantInt::getTrue(Context); | ||||
|   BoolFalse = ConstantInt::getFalse(Context); | ||||
|   BoolUndef = UndefValue::get(Boolean); | ||||
|  | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| /// \brief Build up the general order of nodes | ||||
| void AMDGPUStructurizeCFG::orderNodes() { | ||||
|  | ||||
|   scc_iterator<Region *> I = scc_begin(ParentRegion), | ||||
|                          E = scc_end(ParentRegion); | ||||
|   for (Order.clear(); I != E; ++I) { | ||||
|     std::vector<RegionNode *> &Nodes = *I; | ||||
|     Order.append(Nodes.begin(), Nodes.end()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Build blocks and loop predicates | ||||
| void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx, | ||||
|                                           BBPredicates &Pred, bool Invert) { | ||||
|  | ||||
|   Value *True = Invert ? BoolFalse : BoolTrue; | ||||
|   Value *False = Invert ? BoolTrue : BoolFalse; | ||||
|  | ||||
|   RegionInfo *RI = ParentRegion->getRegionInfo(); | ||||
|   BasicBlock *BB = Term->getParent(); | ||||
|  | ||||
|   // Handle the case where multiple regions start at the same block | ||||
|   Region *R = BB != ParentRegion->getEntry() ? | ||||
|               RI->getRegionFor(BB) : ParentRegion; | ||||
|  | ||||
|   if (R == ParentRegion) { | ||||
|     // It's a top level block in our region | ||||
|     Value *Cond = True; | ||||
|     if (Term->isConditional()) { | ||||
|       BasicBlock *Other = Term->getSuccessor(!Idx); | ||||
|  | ||||
|       if (Visited.count(Other)) { | ||||
|         if (!Pred.count(Other)) | ||||
|           Pred[Other] = False; | ||||
|  | ||||
|         if (!Pred.count(BB)) | ||||
|           Pred[BB] = True; | ||||
|         return; | ||||
|       } | ||||
|       Cond = Term->getCondition(); | ||||
|  | ||||
|       if (Idx != Invert) | ||||
|         Cond = BinaryOperator::CreateNot(Cond, "", Term); | ||||
|     } | ||||
|  | ||||
|     Pred[BB] = Cond; | ||||
|  | ||||
|   } else if (ParentRegion->contains(R)) { | ||||
|     // It's a block in a sub region | ||||
|     while(R->getParent() != ParentRegion) | ||||
|       R = R->getParent(); | ||||
|  | ||||
|     Pred[R->getEntry()] = True; | ||||
|  | ||||
|   } else { | ||||
|     // It's a branch from outside into our parent region | ||||
|     Pred[BB] = True; | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Analyze the successors of each block and build up predicates | ||||
| void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { | ||||
|  | ||||
|   pred_iterator PI = pred_begin(BB), PE = pred_end(BB); | ||||
|   BBPredicates &Pred = Predicates[BB]; | ||||
|  | ||||
|   for (; PI != PE; ++PI) { | ||||
|     BranchInst *Term = cast<BranchInst>((*PI)->getTerminator()); | ||||
|  | ||||
|     for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { | ||||
|       BasicBlock *Succ = Term->getSuccessor(i); | ||||
|       if (Succ != BB) | ||||
|         continue; | ||||
|       buildPredicate(Term, i, Pred, false); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Analyze the conditions leading to loop to a previous block | ||||
| void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) { | ||||
|  | ||||
|   BranchInst *Term = cast<BranchInst>(BB->getTerminator()); | ||||
|  | ||||
|   for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { | ||||
|     BasicBlock *Succ = Term->getSuccessor(i); | ||||
|  | ||||
|     // Ignore it if it's not a back edge | ||||
|     if (!Visited.count(Succ)) | ||||
|       continue; | ||||
|  | ||||
|     buildPredicate(Term, i, LoopPred, true); | ||||
|  | ||||
|     LoopEnd = BB; | ||||
|     if (Visited[Succ] < LoopIdx) { | ||||
|       LoopIdx = Visited[Succ]; | ||||
|       LoopStart = Succ; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Collect various loop and predicate infos | ||||
| void AMDGPUStructurizeCFG::collectInfos() { | ||||
|  | ||||
|   unsigned Number = 0, LoopIdx = ~0; | ||||
|  | ||||
|   // Reset predicate | ||||
|   Predicates.clear(); | ||||
|  | ||||
|   // and loop infos | ||||
|   LoopStart = LoopEnd = 0; | ||||
|   LoopPred.clear(); | ||||
|  | ||||
|   RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); | ||||
|   for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) { | ||||
|  | ||||
|     // Analyze all the conditions leading to a node | ||||
|     analyzeBlock((*OI)->getEntry()); | ||||
|  | ||||
|     if ((*OI)->isSubRegion()) | ||||
|       continue; | ||||
|  | ||||
|     // Find the first/last loop nodes and loop predicates | ||||
|     analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Does A dominate all the predicates of B ? | ||||
| bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) { | ||||
|  | ||||
|   BBPredicates &Preds = Predicates[B]; | ||||
|   for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); | ||||
|        PI != PE; ++PI) { | ||||
|  | ||||
|     if (!DT->dominates(A, PI->first)) | ||||
|       return false; | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| /// \brief Remove phi values from all successors and the remove the terminator. | ||||
| void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { | ||||
|  | ||||
|   TerminatorInst *Term = BB->getTerminator(); | ||||
|   if (!Term) | ||||
|     return; | ||||
|  | ||||
|   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); | ||||
|        SI != SE; ++SI) { | ||||
|  | ||||
|     delPhiValues(BB, *SI); | ||||
|   } | ||||
|  | ||||
|   Term->eraseFromParent(); | ||||
| } | ||||
|  | ||||
| /// First: Skip forward to the first region node that either isn't a subregion or not | ||||
| /// dominating it's exit, remove all the skipped nodes from the node order. | ||||
| /// | ||||
| /// Second: Handle the first successor directly if the resulting nodes successor | ||||
| /// predicates are still dominated by the original entry | ||||
| RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) { | ||||
|  | ||||
|   BasicBlock *Entry = Node->getEntry(); | ||||
|  | ||||
|   // Skip forward as long as it is just a linear flow | ||||
|   while (true) { | ||||
|     BasicBlock *Entry = Node->getEntry(); | ||||
|     BasicBlock *Exit; | ||||
|  | ||||
|     if (Node->isSubRegion()) { | ||||
|       Exit = Node->getNodeAs<Region>()->getExit(); | ||||
|     } else { | ||||
|       TerminatorInst *Term = Entry->getTerminator(); | ||||
|       if (Term->getNumSuccessors() != 1) | ||||
|         break; | ||||
|       Exit = Term->getSuccessor(0); | ||||
|     } | ||||
|  | ||||
|     // It's a back edge, break here so we can insert a loop node | ||||
|     if (!Visited.count(Exit)) | ||||
|       return Node; | ||||
|  | ||||
|     // More than node edges are pointing to exit | ||||
|     if (!DT->dominates(Entry, Exit)) | ||||
|       return Node; | ||||
|  | ||||
|     RegionNode *Next = ParentRegion->getNode(Exit); | ||||
|     RNVector::iterator I = std::find(Order.begin(), Order.end(), Next); | ||||
|     assert(I != Order.end()); | ||||
|  | ||||
|     Visited.erase(Next->getEntry()); | ||||
|     Order.erase(I); | ||||
|     Node = Next; | ||||
|   } | ||||
|  | ||||
|   BasicBlock *BB = Node->getEntry(); | ||||
|   TerminatorInst *Term = BB->getTerminator(); | ||||
|   if (Term->getNumSuccessors() != 2) | ||||
|     return Node; | ||||
|  | ||||
|   // Our node has exactly two succesors, check if we can handle | ||||
|   // any of them directly | ||||
|   BasicBlock *Succ = Term->getSuccessor(0); | ||||
|   if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) { | ||||
|     Succ = Term->getSuccessor(1); | ||||
|     if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) | ||||
|       return Node; | ||||
|   } else { | ||||
|     BasicBlock *Succ2 = Term->getSuccessor(1); | ||||
|     if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] && | ||||
|         dominatesPredicates(Entry, Succ2)) | ||||
|       Succ = Succ2; | ||||
|   } | ||||
|  | ||||
|   RegionNode *Next = ParentRegion->getNode(Succ); | ||||
|   RNVector::iterator E = Order.end(); | ||||
|   RNVector::iterator I = std::find(Order.begin(), E, Next); | ||||
|   assert(I != E); | ||||
|  | ||||
|   killTerminator(BB); | ||||
|   FlowsInserted.push_back(BB); | ||||
|   Visited.erase(Succ); | ||||
|   Order.erase(I); | ||||
|   return ParentRegion->getNode(wireFlowBlock(BB, Next)); | ||||
| } | ||||
|  | ||||
| /// \brief Remove all PHI values coming from "From" into "To" and remember | ||||
| /// them in DeletedPhis | ||||
| void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { | ||||
|  | ||||
|   PhiMap &Map = DeletedPhis[To]; | ||||
|   for (BasicBlock::iterator I = To->begin(), E = To->end(); | ||||
|        I != E && isa<PHINode>(*I);) { | ||||
|  | ||||
|     PHINode &Phi = cast<PHINode>(*I++); | ||||
|     while (Phi.getBasicBlockIndex(From) != -1) { | ||||
|       Value *Deleted = Phi.removeIncomingValue(From, false); | ||||
|       Map[&Phi].push_back(std::make_pair(From, Deleted)); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Add the PHI values back once we knew the new predecessor | ||||
| void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { | ||||
|  | ||||
|   if (!DeletedPhis.count(To)) | ||||
|     return; | ||||
|  | ||||
|   PhiMap &Map = DeletedPhis[To]; | ||||
|   SSAUpdater Updater; | ||||
|  | ||||
|   for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { | ||||
|  | ||||
|     PHINode *Phi = I->first; | ||||
|     Updater.Initialize(Phi->getType(), ""); | ||||
|     BasicBlock *Fallback = To; | ||||
|     bool HaveFallback = false; | ||||
|  | ||||
|     for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end(); | ||||
|          VI != VE; ++VI) { | ||||
|  | ||||
|       Updater.AddAvailableValue(VI->first, VI->second); | ||||
|       BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first); | ||||
|       if (Dom == VI->first) | ||||
|         HaveFallback = true; | ||||
|       else if (Dom != Fallback) | ||||
|         HaveFallback = false; | ||||
|       Fallback = Dom; | ||||
|     } | ||||
|     if (!HaveFallback) { | ||||
|       Value *Undef = UndefValue::get(Phi->getType()); | ||||
|       Updater.AddAvailableValue(Fallback, Undef); | ||||
|     } | ||||
|  | ||||
|     Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From); | ||||
|   } | ||||
|   DeletedPhis.erase(To); | ||||
| } | ||||
|  | ||||
| /// \brief Create a new flow node and update dominator tree and region info | ||||
| BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { | ||||
|  | ||||
|   LLVMContext &Context = Func->getContext(); | ||||
|   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : | ||||
|                        Order.back()->getEntry(); | ||||
|   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, | ||||
|                                         Func, Insert); | ||||
|   DT->addNewBlock(Flow, Prev); | ||||
|   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); | ||||
|   FlowsInserted.push_back(Flow); | ||||
|   return Flow; | ||||
| } | ||||
|  | ||||
| /// \brief Can we predict that this node will always be called? | ||||
| bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev, | ||||
|                                              BasicBlock *Node) { | ||||
|  | ||||
|   BBPredicates &Preds = Predicates[Node]; | ||||
|   bool Dominated = false; | ||||
|  | ||||
|   for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); | ||||
|        I != E; ++I) { | ||||
|  | ||||
|     if (I->second != BoolTrue) | ||||
|       return false; | ||||
|  | ||||
|     if (!Dominated && DT->dominates(I->first, Prev)) | ||||
|       Dominated = true; | ||||
|   } | ||||
|   return Dominated; | ||||
| } | ||||
|  | ||||
| /// \brief Wire up the new control flow by inserting or updating the branch | ||||
| /// instructions at node exits | ||||
| BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, | ||||
|                                                 RegionNode *Node) { | ||||
|  | ||||
|   BasicBlock *Entry = Node->getEntry(); | ||||
|  | ||||
|   if (LoopStart == Entry) { | ||||
|     LoopStart = Prev; | ||||
|     LoopPred[Prev] = BoolTrue; | ||||
|   } | ||||
|  | ||||
|   // Wire it up temporary, skipChained may recurse into us | ||||
|   BranchInst::Create(Entry, Prev); | ||||
|   DT->changeImmediateDominator(Entry, Prev); | ||||
|   addPhiValues(Prev, Entry); | ||||
|  | ||||
|   Node = skipChained(Node); | ||||
|  | ||||
|   BasicBlock *Next = getNextFlow(Prev); | ||||
|   if (!isPredictableTrue(Prev, Entry)) { | ||||
|     // Let Prev point to entry and next block | ||||
|     Prev->getTerminator()->eraseFromParent(); | ||||
|     BranchInst::Create(Entry, Next, BoolUndef, Prev); | ||||
|   } else { | ||||
|     DT->changeImmediateDominator(Next, Entry); | ||||
|   } | ||||
|  | ||||
|   // Let node exit(s) point to next block | ||||
|   if (Node->isSubRegion()) { | ||||
|     Region *SubRegion = Node->getNodeAs<Region>(); | ||||
|     BasicBlock *Exit = SubRegion->getExit(); | ||||
|  | ||||
|     // Find all the edges from the sub region to the exit | ||||
|     BBVector ToDo; | ||||
|     for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { | ||||
|       if (SubRegion->contains(*I)) | ||||
|         ToDo.push_back(*I); | ||||
|     } | ||||
|  | ||||
|     // Modify the edges to point to the new flow block | ||||
|     for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) { | ||||
|       delPhiValues(*I, Exit); | ||||
|       TerminatorInst *Term = (*I)->getTerminator(); | ||||
|       Term->replaceUsesOfWith(Exit, Next); | ||||
|     } | ||||
|  | ||||
|     // Update the region info | ||||
|     SubRegion->replaceExit(Next); | ||||
|  | ||||
|   } else { | ||||
|     BasicBlock *BB = Node->getNodeAs<BasicBlock>(); | ||||
|     killTerminator(BB); | ||||
|     BranchInst::Create(Next, BB); | ||||
|  | ||||
|     if (BB == LoopEnd) | ||||
|       LoopEnd = 0; | ||||
|   } | ||||
|  | ||||
|   return Next; | ||||
| } | ||||
|  | ||||
| /// Destroy node order and visited map, build up flow order instead. | ||||
| /// After this function control flow looks like it should be, but | ||||
| /// branches only have undefined conditions. | ||||
| void AMDGPUStructurizeCFG::createFlow() { | ||||
|  | ||||
|   DeletedPhis.clear(); | ||||
|  | ||||
|   BasicBlock *Prev = Order.pop_back_val()->getEntry(); | ||||
|   assert(Prev == ParentRegion->getEntry() && "Incorrect node order!"); | ||||
|   Visited.erase(Prev); | ||||
|  | ||||
|   if (LoopStart == Prev) { | ||||
|     // Loop starts at entry, split entry so that we can predicate it | ||||
|     BasicBlock::iterator Insert = Prev->getFirstInsertionPt(); | ||||
|     BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName); | ||||
|     DT->addNewBlock(Split, Prev); | ||||
|     ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); | ||||
|     Predicates[Split] = Predicates[Prev]; | ||||
|     Order.push_back(ParentRegion->getBBNode(Split)); | ||||
|     LoopPred[Prev] = BoolTrue; | ||||
|  | ||||
|   } else if (LoopStart == Order.back()->getEntry()) { | ||||
|     // Loop starts behind entry, split entry so that we can jump to it | ||||
|     Instruction *Term = Prev->getTerminator(); | ||||
|     BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName); | ||||
|     DT->addNewBlock(Split, Prev); | ||||
|     ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); | ||||
|     Prev = Split; | ||||
|   } | ||||
|  | ||||
|   killTerminator(Prev); | ||||
|   FlowsInserted.clear(); | ||||
|   FlowsInserted.push_back(Prev); | ||||
|  | ||||
|   while (!Order.empty()) { | ||||
|     RegionNode *Node = Order.pop_back_val(); | ||||
|     Visited.erase(Node->getEntry()); | ||||
|     Prev = wireFlowBlock(Prev, Node); | ||||
|     if (LoopStart && !LoopEnd) { | ||||
|       // Create an extra loop end node | ||||
|       LoopEnd = Prev; | ||||
|       Prev = getNextFlow(LoopEnd); | ||||
|       BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd); | ||||
|       addPhiValues(LoopEnd, LoopStart); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   BasicBlock *Exit = ParentRegion->getExit(); | ||||
|   BranchInst::Create(Exit, Prev); | ||||
|   addPhiValues(Prev, Exit); | ||||
|   if (DT->dominates(ParentRegion->getEntry(), Exit)) | ||||
|     DT->changeImmediateDominator(Exit, Prev); | ||||
|  | ||||
|   if (LoopStart && LoopEnd) { | ||||
|     BBVector::iterator FI = std::find(FlowsInserted.begin(), | ||||
|                                       FlowsInserted.end(), | ||||
|                                       LoopStart); | ||||
|     for (; *FI != LoopEnd; ++FI) { | ||||
|       addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0)); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   assert(Order.empty()); | ||||
|   assert(Visited.empty()); | ||||
|   assert(DeletedPhis.empty()); | ||||
| } | ||||
|  | ||||
| /// \brief Insert the missing branch conditions | ||||
| void AMDGPUStructurizeCFG::insertConditions() { | ||||
|  | ||||
|   SSAUpdater PhiInserter; | ||||
|  | ||||
|   for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end(); | ||||
|        FI != FE; ++FI) { | ||||
|  | ||||
|     BranchInst *Term = cast<BranchInst>((*FI)->getTerminator()); | ||||
|     if (Term->isUnconditional()) | ||||
|       continue; | ||||
|  | ||||
|     PhiInserter.Initialize(Boolean, ""); | ||||
|     PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse); | ||||
|  | ||||
|     BasicBlock *Succ = Term->getSuccessor(0); | ||||
|     BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ]; | ||||
|     for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); | ||||
|          PI != PE; ++PI) { | ||||
|  | ||||
|       PhiInserter.AddAvailableValue(PI->first, PI->second); | ||||
|     } | ||||
|  | ||||
|     Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// Handle a rare case where the disintegrated nodes instructions | ||||
| /// no longer dominate all their uses. Not sure if this is really nessasary | ||||
| void AMDGPUStructurizeCFG::rebuildSSA() { | ||||
|  | ||||
|   SSAUpdater Updater; | ||||
|   for (Region::block_iterator I = ParentRegion->block_begin(), | ||||
|                               E = ParentRegion->block_end(); | ||||
|        I != E; ++I) { | ||||
|  | ||||
|     BasicBlock *BB = *I; | ||||
|     for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); | ||||
|          II != IE; ++II) { | ||||
|  | ||||
|       bool Initialized = false; | ||||
|       for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) { | ||||
|  | ||||
|         Next = I->getNext(); | ||||
|  | ||||
|         Instruction *User = cast<Instruction>(I->getUser()); | ||||
|         if (User->getParent() == BB) { | ||||
|           continue; | ||||
|  | ||||
|         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { | ||||
|           if (UserPN->getIncomingBlock(*I) == BB) | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         if (DT->dominates(II, User)) | ||||
|           continue; | ||||
|  | ||||
|         if (!Initialized) { | ||||
|           Value *Undef = UndefValue::get(II->getType()); | ||||
|           Updater.Initialize(II->getType(), ""); | ||||
|           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); | ||||
|           Updater.AddAvailableValue(BB, II); | ||||
|           Initialized = true; | ||||
|         } | ||||
|         Updater.RewriteUseAfterInsertions(*I); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Run the transformation for each region found | ||||
| bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { | ||||
|  | ||||
|   if (R->isTopLevelRegion()) | ||||
|     return false; | ||||
|  | ||||
|   Func = R->getEntry()->getParent(); | ||||
|   ParentRegion = R; | ||||
|  | ||||
|   DT = &getAnalysis<DominatorTree>(); | ||||
|  | ||||
|   orderNodes(); | ||||
|   collectInfos(); | ||||
|   createFlow(); | ||||
|   insertConditions(); | ||||
|   rebuildSSA(); | ||||
|  | ||||
|   Order.clear(); | ||||
|   Visited.clear(); | ||||
|   Predicates.clear(); | ||||
|   DeletedPhis.clear(); | ||||
|   FlowsInserted.clear(); | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| /// \brief Create the pass | ||||
| Pass *llvm::createAMDGPUStructurizeCFGPass() { | ||||
|   return new AMDGPUStructurizeCFG(); | ||||
| } | ||||
| @@ -91,6 +91,11 @@ TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { | ||||
|  | ||||
| bool | ||||
| AMDGPUPassConfig::addPreISel() { | ||||
|   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); | ||||
|   if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { | ||||
|     addPass(createAMDGPUStructurizeCFGPass()); | ||||
|     addPass(createSIAnnotateControlFlowPass()); | ||||
|   } | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| @@ -107,9 +112,6 @@ bool AMDGPUPassConfig::addPreRegAlloc() { | ||||
|     addPass(createSIAssignInterpRegsPass(*TM)); | ||||
|   } | ||||
|   addPass(createAMDGPUConvertToISAPass(*TM)); | ||||
|   if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { | ||||
|     addPass(createSIFixSGPRLivenessPass(*TM)); | ||||
|   } | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| @@ -124,11 +126,10 @@ bool AMDGPUPassConfig::addPreSched2() { | ||||
| } | ||||
|  | ||||
| bool AMDGPUPassConfig::addPreEmitPass() { | ||||
|   addPass(createAMDGPUCFGPreparationPass(*TM)); | ||||
|   addPass(createAMDGPUCFGStructurizerPass(*TM)); | ||||
|  | ||||
|   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); | ||||
|   if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { | ||||
|     addPass(createAMDGPUCFGPreparationPass(*TM)); | ||||
|     addPass(createAMDGPUCFGStructurizerPass(*TM)); | ||||
|     addPass(createR600ExpandSpecialInstrsPass(*TM)); | ||||
|     addPass(&FinalizeMachineBundlesID); | ||||
|   } else { | ||||
|   | ||||
| @@ -2596,7 +2596,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { | ||||
|     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; | ||||
|     case AMDGPU::BRANCH_COND_i32: | ||||
|     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; | ||||
|     case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ; | ||||
|     default: | ||||
|       assert(0 && "internal error"); | ||||
|     } | ||||
| @@ -2608,7 +2607,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { | ||||
|     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; | ||||
|     case AMDGPU::BRANCH_COND_i32: | ||||
|     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; | ||||
|     case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z; | ||||
|     default: | ||||
|       assert(0 && "internal error"); | ||||
|     } | ||||
| @@ -2658,8 +2656,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { | ||||
|         return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; | ||||
|       case AMDGPU::BRANCH_COND_i32: | ||||
|       case AMDGPU::BRANCH_COND_f32: | ||||
|       case AMDGPU::SI_IF_NZ: | ||||
|       case AMDGPU::SI_IF_Z: | ||||
|       break; | ||||
|     default: | ||||
|       return false; | ||||
|   | ||||
| @@ -206,68 +206,3 @@ multiclass BranchInstr2<string name> { | ||||
| // Intrinsics support | ||||
| //===--------------------------------------------------------------------===// | ||||
| include "AMDILIntrinsics.td" | ||||
|  | ||||
| //===--------------------------------------------------------------------===// | ||||
| // Instructions support | ||||
| //===--------------------------------------------------------------------===// | ||||
| //===---------------------------------------------------------------------===// | ||||
| // Custom Inserter for Branches and returns, this eventually will be a | ||||
| // seperate pass | ||||
| //===---------------------------------------------------------------------===// | ||||
| let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { | ||||
|   def BRANCH : ILFormat<(outs), (ins brtarget:$target), | ||||
|       "; Pseudo unconditional branch instruction", | ||||
|       [(br bb:$target)]>; | ||||
|   defm BRANCH_COND : BranchConditional<IL_brcond>; | ||||
| } | ||||
|  | ||||
| //===---------------------------------------------------------------------===// | ||||
| // Flow and Program control Instructions | ||||
| //===---------------------------------------------------------------------===// | ||||
| let isTerminator=1 in { | ||||
|   def SWITCH      : ILFormat< (outs), (ins GPRI32:$src), | ||||
|   !strconcat("SWITCH", " $src"), []>; | ||||
|   def CASE        : ILFormat< (outs), (ins GPRI32:$src), | ||||
|       !strconcat("CASE", " $src"), []>; | ||||
|   def BREAK       : ILFormat< (outs), (ins), | ||||
|       "BREAK", []>; | ||||
|   def CONTINUE    : ILFormat< (outs), (ins), | ||||
|       "CONTINUE", []>; | ||||
|   def DEFAULT     : ILFormat< (outs), (ins), | ||||
|       "DEFAULT", []>; | ||||
|   def ELSE        : ILFormat< (outs), (ins), | ||||
|       "ELSE", []>; | ||||
|   def ENDSWITCH   : ILFormat< (outs), (ins), | ||||
|       "ENDSWITCH", []>; | ||||
|   def ENDMAIN     : ILFormat< (outs), (ins), | ||||
|       "ENDMAIN", []>; | ||||
|   def END         : ILFormat< (outs), (ins), | ||||
|       "END", []>; | ||||
|   def ENDFUNC     : ILFormat< (outs), (ins), | ||||
|       "ENDFUNC", []>; | ||||
|   def ENDIF       : ILFormat< (outs), (ins), | ||||
|       "ENDIF", []>; | ||||
|   def WHILELOOP   : ILFormat< (outs), (ins), | ||||
|       "WHILE", []>; | ||||
|   def ENDLOOP     : ILFormat< (outs), (ins), | ||||
|       "ENDLOOP", []>; | ||||
|   def FUNC        : ILFormat< (outs), (ins), | ||||
|       "FUNC", []>; | ||||
|   def RETDYN      : ILFormat< (outs), (ins), | ||||
|       "RET_DYN", []>; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; | ||||
|   defm IFC         : BranchInstr2<"IFC">; | ||||
|   defm BREAKC      : BranchInstr2<"BREAKC">; | ||||
|   defm CONTINUEC   : BranchInstr2<"CONTINUEC">; | ||||
| } | ||||
|   | ||||
| @@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen | ||||
|   AMDGPUAsmPrinter.cpp | ||||
|   AMDGPUMCInstLower.cpp | ||||
|   AMDGPUSubtarget.cpp | ||||
|   AMDGPUStructurizeCFG.cpp | ||||
|   AMDGPUTargetMachine.cpp | ||||
|   AMDGPUISelLowering.cpp | ||||
|   AMDGPUConvertToISA.cpp | ||||
| @@ -37,6 +38,7 @@ add_llvm_target(R600CodeGen | ||||
|   R600ISelLowering.cpp | ||||
|   R600MachineFunctionInfo.cpp | ||||
|   R600RegisterInfo.cpp | ||||
|   SIAnnotateControlFlow.cpp | ||||
|   SIAssignInterpRegs.cpp | ||||
|   SIInstrInfo.cpp | ||||
|   SIISelLowering.cpp | ||||
| @@ -44,7 +46,6 @@ add_llvm_target(R600CodeGen | ||||
|   SILowerControlFlow.cpp | ||||
|   SIMachineFunctionInfo.cpp | ||||
|   SIRegisterInfo.cpp | ||||
|   SIFixSGPRLiveness.cpp | ||||
|   ) | ||||
|  | ||||
| add_dependencies(LLVMR600CodeGen intrinsics_gen) | ||||
|   | ||||
| @@ -1545,6 +1545,71 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { | ||||
|       "RETURN", [(IL_retflag)]>; | ||||
| } | ||||
|  | ||||
| //===--------------------------------------------------------------------===// | ||||
| // Instructions support | ||||
| //===--------------------------------------------------------------------===// | ||||
| //===---------------------------------------------------------------------===// | ||||
| // Custom Inserter for Branches and returns, this eventually will be a | ||||
| // seperate pass | ||||
| //===---------------------------------------------------------------------===// | ||||
| let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { | ||||
|   def BRANCH : ILFormat<(outs), (ins brtarget:$target), | ||||
|       "; Pseudo unconditional branch instruction", | ||||
|       [(br bb:$target)]>; | ||||
|   defm BRANCH_COND : BranchConditional<IL_brcond>; | ||||
| } | ||||
|  | ||||
| //===---------------------------------------------------------------------===// | ||||
| // Flow and Program control Instructions | ||||
| //===---------------------------------------------------------------------===// | ||||
| let isTerminator=1 in { | ||||
|   def SWITCH      : ILFormat< (outs), (ins GPRI32:$src), | ||||
|   !strconcat("SWITCH", " $src"), []>; | ||||
|   def CASE        : ILFormat< (outs), (ins GPRI32:$src), | ||||
|       !strconcat("CASE", " $src"), []>; | ||||
|   def BREAK       : ILFormat< (outs), (ins), | ||||
|       "BREAK", []>; | ||||
|   def CONTINUE    : ILFormat< (outs), (ins), | ||||
|       "CONTINUE", []>; | ||||
|   def DEFAULT     : ILFormat< (outs), (ins), | ||||
|       "DEFAULT", []>; | ||||
|   def ELSE        : ILFormat< (outs), (ins), | ||||
|       "ELSE", []>; | ||||
|   def ENDSWITCH   : ILFormat< (outs), (ins), | ||||
|       "ENDSWITCH", []>; | ||||
|   def ENDMAIN     : ILFormat< (outs), (ins), | ||||
|       "ENDMAIN", []>; | ||||
|   def END         : ILFormat< (outs), (ins), | ||||
|       "END", []>; | ||||
|   def ENDFUNC     : ILFormat< (outs), (ins), | ||||
|       "ENDFUNC", []>; | ||||
|   def ENDIF       : ILFormat< (outs), (ins), | ||||
|       "ENDIF", []>; | ||||
|   def WHILELOOP   : ILFormat< (outs), (ins), | ||||
|       "WHILE", []>; | ||||
|   def ENDLOOP     : ILFormat< (outs), (ins), | ||||
|       "ENDLOOP", []>; | ||||
|   def FUNC        : ILFormat< (outs), (ins), | ||||
|       "FUNC", []>; | ||||
|   def RETDYN      : ILFormat< (outs), (ins), | ||||
|       "RET_DYN", []>; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; | ||||
|   // This opcode has custom swizzle pattern encoded in Swizzle Encoder | ||||
|   defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; | ||||
|   defm IFC         : BranchInstr2<"IFC">; | ||||
|   defm BREAKC      : BranchInstr2<"BREAKC">; | ||||
|   defm CONTINUEC   : BranchInstr2<"CONTINUEC">; | ||||
| } | ||||
|  | ||||
| //===----------------------------------------------------------------------===// | ||||
| // ISel Patterns | ||||
| //===----------------------------------------------------------------------===// | ||||
|   | ||||
							
								
								
									
										337
									
								
								lib/Target/R600/SIAnnotateControlFlow.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										337
									
								
								lib/Target/R600/SIAnnotateControlFlow.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,337 @@ | ||||
| //===-- SIAnnotateControlFlow.cpp -  ------------------===// | ||||
| // | ||||
| //                     The LLVM Compiler Infrastructure | ||||
| // | ||||
| // This file is distributed under the University of Illinois Open Source | ||||
| // License. See LICENSE.TXT for details. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
| // | ||||
| /// \file | ||||
| /// Annotates the control flow with hardware specific intrinsics. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| #include "AMDGPU.h" | ||||
|  | ||||
| #include "llvm/Pass.h" | ||||
| #include "llvm/Module.h" | ||||
| #include "llvm/Analysis/Dominators.h" | ||||
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||
| #include "llvm/ADT/DepthFirstIterator.h" | ||||
| #include "llvm/Transforms/Utils/SSAUpdater.h" | ||||
|  | ||||
| using namespace llvm; | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| // Complex types used in this pass | ||||
| typedef std::pair<BasicBlock *, Value *> StackEntry; | ||||
| typedef SmallVector<StackEntry, 16> StackVector; | ||||
|  | ||||
| // Intrinsic names the control flow is annotated with | ||||
| static const char *IfIntrinsic = "llvm.SI.if"; | ||||
| static const char *ElseIntrinsic = "llvm.SI.else"; | ||||
| static const char *BreakIntrinsic = "llvm.SI.break"; | ||||
| static const char *IfBreakIntrinsic = "llvm.SI.if.break"; | ||||
| static const char *ElseBreakIntrinsic = "llvm.SI.else.break"; | ||||
| static const char *LoopIntrinsic = "llvm.SI.loop"; | ||||
| static const char *EndCfIntrinsic = "llvm.SI.end.cf"; | ||||
|  | ||||
| class SIAnnotateControlFlow : public FunctionPass { | ||||
|  | ||||
|   static char ID; | ||||
|  | ||||
|   Type *Boolean; | ||||
|   Type *Void; | ||||
|   Type *Int64; | ||||
|   Type *ReturnStruct; | ||||
|  | ||||
|   ConstantInt *BoolTrue; | ||||
|   ConstantInt *BoolFalse; | ||||
|   UndefValue *BoolUndef; | ||||
|   Constant *Int64Zero; | ||||
|  | ||||
|   Constant *If; | ||||
|   Constant *Else; | ||||
|   Constant *Break; | ||||
|   Constant *IfBreak; | ||||
|   Constant *ElseBreak; | ||||
|   Constant *Loop; | ||||
|   Constant *EndCf; | ||||
|  | ||||
|   DominatorTree *DT; | ||||
|   StackVector Stack; | ||||
|   SSAUpdater PhiInserter; | ||||
|  | ||||
|   bool isTopOfStack(BasicBlock *BB); | ||||
|  | ||||
|   Value *popSaved(); | ||||
|  | ||||
|   void push(BasicBlock *BB, Value *Saved); | ||||
|  | ||||
|   bool isElse(PHINode *Phi); | ||||
|  | ||||
|   void eraseIfUnused(PHINode *Phi); | ||||
|  | ||||
|   void openIf(BranchInst *Term); | ||||
|  | ||||
|   void insertElse(BranchInst *Term); | ||||
|  | ||||
|   void handleLoopCondition(Value *Cond); | ||||
|  | ||||
|   void handleLoop(BranchInst *Term); | ||||
|  | ||||
|   void closeControlFlow(BasicBlock *BB); | ||||
|  | ||||
| public: | ||||
|   SIAnnotateControlFlow(): | ||||
|     FunctionPass(ID) { } | ||||
|  | ||||
|   virtual bool doInitialization(Module &M); | ||||
|  | ||||
|   virtual bool runOnFunction(Function &F); | ||||
|  | ||||
|   virtual const char *getPassName() const { | ||||
|     return "SI annotate control flow"; | ||||
|   } | ||||
|  | ||||
|   virtual void getAnalysisUsage(AnalysisUsage &AU) const { | ||||
|  | ||||
|     AU.addRequired<DominatorTree>(); | ||||
|     AU.addPreserved<DominatorTree>(); | ||||
|     FunctionPass::getAnalysisUsage(AU); | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| } // end anonymous namespace | ||||
|  | ||||
| char SIAnnotateControlFlow::ID = 0; | ||||
|  | ||||
| /// \brief Initialize all the types and constants used in the pass | ||||
| bool SIAnnotateControlFlow::doInitialization(Module &M) { | ||||
|  | ||||
|   LLVMContext &Context = M.getContext(); | ||||
|  | ||||
|   Void = Type::getVoidTy(Context); | ||||
|   Boolean = Type::getInt1Ty(Context); | ||||
|   Int64 = Type::getInt64Ty(Context); | ||||
|   ReturnStruct = StructType::get(Boolean, Int64, (Type *)0); | ||||
|  | ||||
|   BoolTrue = ConstantInt::getTrue(Context); | ||||
|   BoolFalse = ConstantInt::getFalse(Context); | ||||
|   BoolUndef = UndefValue::get(Boolean); | ||||
|   Int64Zero = ConstantInt::get(Int64, 0); | ||||
|  | ||||
|   If = M.getOrInsertFunction( | ||||
|     IfIntrinsic, ReturnStruct, Boolean, (Type *)0); | ||||
|  | ||||
|   Else = M.getOrInsertFunction( | ||||
|     ElseIntrinsic, ReturnStruct, Int64, (Type *)0); | ||||
|  | ||||
|   Break = M.getOrInsertFunction( | ||||
|     BreakIntrinsic, Int64, Int64, (Type *)0); | ||||
|  | ||||
|   IfBreak = M.getOrInsertFunction( | ||||
|     IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0); | ||||
|  | ||||
|   ElseBreak = M.getOrInsertFunction( | ||||
|     ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0); | ||||
|  | ||||
|   Loop = M.getOrInsertFunction( | ||||
|     LoopIntrinsic, Boolean, Int64, (Type *)0); | ||||
|  | ||||
|   EndCf = M.getOrInsertFunction( | ||||
|     EndCfIntrinsic, Void, Int64, (Type *)0); | ||||
|  | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| /// \brief Is BB the last block saved on the stack ? | ||||
| bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) { | ||||
|   return Stack.back().first == BB; | ||||
| } | ||||
|  | ||||
| /// \brief Pop the last saved value from the control flow stack | ||||
| Value *SIAnnotateControlFlow::popSaved() { | ||||
|   return Stack.pop_back_val().second; | ||||
| } | ||||
|  | ||||
| /// \brief Push a BB and saved value to the control flow stack | ||||
| void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) { | ||||
|   Stack.push_back(std::make_pair(BB, Saved)); | ||||
| } | ||||
|  | ||||
| /// \brief Can the condition represented by this PHI node treated like | ||||
| /// an "Else" block? | ||||
| bool SIAnnotateControlFlow::isElse(PHINode *Phi) { | ||||
|  | ||||
|   BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock(); | ||||
|   for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { | ||||
|     if (Phi->getIncomingBlock(i) == IDom) { | ||||
|  | ||||
|       if (Phi->getIncomingValue(i) != BoolTrue) | ||||
|         return false; | ||||
|  | ||||
|     } else { | ||||
|       if (Phi->getIncomingValue(i) != BoolFalse) | ||||
|         return false; | ||||
|   | ||||
|     } | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| // \brief Erase "Phi" if it is not used any more | ||||
| void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { | ||||
|   if (!Phi->hasNUsesOrMore(1)) | ||||
|     Phi->eraseFromParent(); | ||||
| } | ||||
|  | ||||
| /// \brief Open a new "If" block | ||||
| void SIAnnotateControlFlow::openIf(BranchInst *Term) { | ||||
|   Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term); | ||||
|   Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); | ||||
|   push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); | ||||
| } | ||||
|  | ||||
| /// \brief Close the last "If" block and open a new "Else" block | ||||
| void SIAnnotateControlFlow::insertElse(BranchInst *Term) { | ||||
|   Value *Ret = CallInst::Create(Else, popSaved(), "", Term); | ||||
|   Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); | ||||
|   push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); | ||||
| } | ||||
|  | ||||
| /// \brief Recursively handle the condition leading to a loop | ||||
| void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { | ||||
|  | ||||
|   if (PHINode *Phi = dyn_cast<PHINode>(Cond)) { | ||||
|  | ||||
|     // Handle all non constant incoming values first | ||||
|     for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { | ||||
|       Value *Incoming = Phi->getIncomingValue(i); | ||||
|       if (isa<ConstantInt>(Incoming)) | ||||
|         continue; | ||||
|  | ||||
|       Phi->setIncomingValue(i, BoolFalse); | ||||
|       handleLoopCondition(Incoming); | ||||
|     } | ||||
|  | ||||
|     BasicBlock *Parent = Phi->getParent(); | ||||
|     BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock(); | ||||
|  | ||||
|     for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { | ||||
|  | ||||
|       Value *Incoming = Phi->getIncomingValue(i); | ||||
|       if (Incoming != BoolTrue) | ||||
|         continue; | ||||
|  | ||||
|       BasicBlock *From = Phi->getIncomingBlock(i); | ||||
|       if (From == IDom) { | ||||
|         CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt()); | ||||
|         if (OldEnd && OldEnd->getCalledFunction() == EndCf) { | ||||
|           Value *Args[] = { | ||||
|             OldEnd->getArgOperand(0), | ||||
|             PhiInserter.GetValueAtEndOfBlock(Parent) | ||||
|           }; | ||||
|           Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); | ||||
|           PhiInserter.AddAvailableValue(Parent, Ret); | ||||
|           continue; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       TerminatorInst *Insert = From->getTerminator(); | ||||
|       Value *Arg = PhiInserter.GetValueAtEndOfBlock(From); | ||||
|       Value *Ret = CallInst::Create(Break, Arg, "", Insert); | ||||
|       PhiInserter.AddAvailableValue(From, Ret); | ||||
|     } | ||||
|     eraseIfUnused(Phi); | ||||
|  | ||||
|   } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { | ||||
|     BasicBlock *Parent = Inst->getParent(); | ||||
|     TerminatorInst *Insert = Parent->getTerminator(); | ||||
|     Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) }; | ||||
|     Value *Ret = CallInst::Create(IfBreak, Args, "", Insert); | ||||
|     PhiInserter.AddAvailableValue(Parent, Ret); | ||||
|  | ||||
|   } else { | ||||
|     assert(0 && "Unhandled loop condition!"); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /// \brief Handle a back edge (loop) | ||||
| void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { | ||||
|  | ||||
|   BasicBlock *Target = Term->getSuccessor(1); | ||||
|   PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); | ||||
|  | ||||
|   PhiInserter.Initialize(Int64, ""); | ||||
|   PhiInserter.AddAvailableValue(Target, Broken); | ||||
|  | ||||
|   Value *Cond = Term->getCondition(); | ||||
|   Term->setCondition(BoolTrue); | ||||
|   handleLoopCondition(Cond); | ||||
|  | ||||
|   BasicBlock *BB = Term->getParent(); | ||||
|   Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB); | ||||
|   for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); | ||||
|        PI != PE; ++PI) { | ||||
|  | ||||
|     Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI); | ||||
|   } | ||||
|  | ||||
|   Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); | ||||
|   push(Term->getSuccessor(0), Arg); | ||||
| } | ||||
|  | ||||
| /// \brief Close the last opened control flow | ||||
| void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { | ||||
|   CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt()); | ||||
| } | ||||
|  | ||||
| /// \brief Annotate the control flow with intrinsics so the backend can | ||||
| /// recognize if/then/else and loops. | ||||
| bool SIAnnotateControlFlow::runOnFunction(Function &F) { | ||||
|  | ||||
|   DT = &getAnalysis<DominatorTree>(); | ||||
|  | ||||
|   for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()), | ||||
|        E = df_end(&F.getEntryBlock()); I != E; ++I) { | ||||
|  | ||||
|     BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator()); | ||||
|  | ||||
|     if (!Term || Term->isUnconditional()) { | ||||
|       if (isTopOfStack(*I)) | ||||
|         closeControlFlow(*I); | ||||
|       continue; | ||||
|     } | ||||
|  | ||||
|     if (I.nodeVisited(Term->getSuccessor(1))) { | ||||
|       if (isTopOfStack(*I)) | ||||
|         closeControlFlow(*I); | ||||
|       handleLoop(Term); | ||||
|       continue; | ||||
|     } | ||||
|  | ||||
|     if (isTopOfStack(*I)) { | ||||
|       PHINode *Phi = dyn_cast<PHINode>(Term->getCondition()); | ||||
|       if (Phi && Phi->getParent() == *I && isElse(Phi)) { | ||||
|         insertElse(Term); | ||||
|         eraseIfUnused(Phi); | ||||
|         continue; | ||||
|       } | ||||
|       closeControlFlow(*I); | ||||
|     } | ||||
|     openIf(Term); | ||||
|   } | ||||
|  | ||||
|   assert(Stack.empty()); | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| /// \brief Create the annotation pass | ||||
| FunctionPass *llvm::createSIAnnotateControlFlowPass() { | ||||
|  | ||||
|   return new SIAnnotateControlFlow(); | ||||
| } | ||||
| @@ -1,179 +0,0 @@ | ||||
| //===-- SIFixSGPRLiveness.cpp - SGPR liveness adjustment ------------------===// | ||||
| // | ||||
| //                     The LLVM Compiler Infrastructure | ||||
| // | ||||
| // This file is distributed under the University of Illinois Open Source | ||||
| // License. See LICENSE.TXT for details. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
| // | ||||
| /// \file | ||||
| /// | ||||
| /// SGPRs are not affected by control flow. This pass adjusts SGPR liveness in | ||||
| /// so that the register allocator can still correctly allocate them. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| #include "AMDGPU.h" | ||||
| #include "llvm/CodeGen/MachineFunctionPass.h" | ||||
| #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
| #include "llvm/CodeGen/MachineDominators.h" | ||||
| #include "llvm/CodeGen/MachinePostDominators.h" | ||||
| #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||
|  | ||||
| using namespace llvm; | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| class SIFixSGPRLiveness : public MachineFunctionPass { | ||||
| private: | ||||
|   static char ID; | ||||
|  | ||||
|   const TargetInstrInfo *TII; | ||||
|   MachineRegisterInfo *MRI; | ||||
|   MachineDominatorTree *MD; | ||||
|   MachinePostDominatorTree *MPD; | ||||
|  | ||||
|   bool isSGPR(const TargetRegisterClass *RegClass) { | ||||
|     return RegClass == &AMDGPU::SReg_1RegClass || | ||||
|            RegClass == &AMDGPU::SReg_32RegClass || | ||||
|            RegClass == &AMDGPU::SReg_64RegClass || | ||||
|            RegClass == &AMDGPU::SReg_128RegClass || | ||||
|            RegClass == &AMDGPU::SReg_256RegClass; | ||||
|   } | ||||
|  | ||||
|   void addKill(MachineBasicBlock::iterator I, unsigned Reg); | ||||
|   MachineBasicBlock *handleUses(unsigned VirtReg, MachineBasicBlock *Begin); | ||||
|   void handlePreds(MachineBasicBlock *Begin, MachineBasicBlock *End, | ||||
|                    unsigned VirtReg); | ||||
|  | ||||
|   bool handleVirtReg(unsigned VirtReg); | ||||
|  | ||||
| public: | ||||
|   SIFixSGPRLiveness(TargetMachine &tm); | ||||
|  | ||||
|   virtual bool runOnMachineFunction(MachineFunction &MF); | ||||
|  | ||||
|   virtual const char *getPassName() const { | ||||
|     return "SI fix SGPR liveness pass"; | ||||
|   } | ||||
|  | ||||
|   virtual void getAnalysisUsage(AnalysisUsage &AU) const; | ||||
| }; | ||||
|  | ||||
| } // end anonymous namespace | ||||
|  | ||||
| char SIFixSGPRLiveness::ID = 0; | ||||
|  | ||||
| SIFixSGPRLiveness::SIFixSGPRLiveness(TargetMachine &tm): | ||||
|   MachineFunctionPass(ID), | ||||
|   TII(tm.getInstrInfo()) { | ||||
|   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); | ||||
| } | ||||
|  | ||||
| void SIFixSGPRLiveness::getAnalysisUsage(AnalysisUsage &AU) const { | ||||
|   AU.addRequired<MachineDominatorTree>(); | ||||
|   AU.addRequired<MachinePostDominatorTree>(); | ||||
|   AU.setPreservesCFG(); | ||||
|   MachineFunctionPass::getAnalysisUsage(AU); | ||||
| } | ||||
|  | ||||
| void SIFixSGPRLiveness::addKill(MachineBasicBlock::iterator I, unsigned Reg) { | ||||
|   MachineBasicBlock *MBB = I->getParent(); | ||||
|  | ||||
|   BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)).addReg(Reg); | ||||
| } | ||||
|  | ||||
| // Find the common post dominator of all uses | ||||
| MachineBasicBlock *SIFixSGPRLiveness::handleUses(unsigned VirtReg, | ||||
|                                                  MachineBasicBlock *Begin) { | ||||
|   MachineBasicBlock *LastUse = Begin, *End = Begin; | ||||
|   bool EndUsesReg = true; | ||||
|  | ||||
|   MachineRegisterInfo::use_iterator i, e; | ||||
|   for (i = MRI->use_begin(VirtReg), e = MRI->use_end(); i != e; ++i) { | ||||
|     MachineBasicBlock *MBB = i->getParent(); | ||||
|     if (LastUse == MBB) | ||||
|       continue; | ||||
|  | ||||
|     LastUse = MBB; | ||||
|     MBB = MPD->findNearestCommonDominator(End, MBB); | ||||
|  | ||||
|     if (MBB == LastUse) | ||||
|       EndUsesReg = true; | ||||
|     else if (MBB != End) | ||||
|       EndUsesReg = false; | ||||
|  | ||||
|     End = MBB; | ||||
|   } | ||||
|  | ||||
|   return EndUsesReg ? Begin : End; | ||||
| } | ||||
|  | ||||
| // Handles predecessors separately, only add KILLs to dominated ones | ||||
| void SIFixSGPRLiveness::handlePreds(MachineBasicBlock *Begin, | ||||
|                                     MachineBasicBlock *End, | ||||
|                                     unsigned VirtReg) { | ||||
|   MachineBasicBlock::pred_iterator i, e; | ||||
|   for (i = End->pred_begin(), e = End->pred_end(); i != e; ++i) { | ||||
|  | ||||
|     if (MD->dominates(End, *i)) | ||||
|       continue; // ignore loops | ||||
|  | ||||
|     if (MD->dominates(*i, Begin)) | ||||
|       continue; // too far up, abort search | ||||
|  | ||||
|     if (MD->dominates(Begin, *i)) { | ||||
|       // found end of livetime | ||||
|       addKill((*i)->getFirstTerminator(), VirtReg); | ||||
|       continue; | ||||
|     } | ||||
|  | ||||
|     handlePreds(Begin, *i, VirtReg); | ||||
|   } | ||||
| } | ||||
|  | ||||
| bool SIFixSGPRLiveness::handleVirtReg(unsigned VirtReg) { | ||||
|  | ||||
|   MachineInstr *Def = MRI->getVRegDef(VirtReg); | ||||
|   if (!Def || MRI->use_empty(VirtReg)) | ||||
|     return false; // No definition or not used | ||||
|  | ||||
|   MachineBasicBlock *Begin = Def->getParent(); | ||||
|   MachineBasicBlock *End = handleUses(VirtReg, Begin); | ||||
|   if (Begin == End) | ||||
|     return false; // Defined and only used in the same block | ||||
|  | ||||
|   if (MD->dominates(Begin, End)) { | ||||
|     // Lifetime dominate the end node, just kill it here | ||||
|     addKill(End->getFirstNonPHI(), VirtReg); | ||||
|   } else { | ||||
|     // only some predecessors are dominate, handle them separately | ||||
|     handlePreds(Begin, End, VirtReg); | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| bool SIFixSGPRLiveness::runOnMachineFunction(MachineFunction &MF) { | ||||
|   bool Changes = false; | ||||
|  | ||||
|   MRI = &MF.getRegInfo(); | ||||
|   MD = &getAnalysis<MachineDominatorTree>(); | ||||
|   MPD = &getAnalysis<MachinePostDominatorTree>(); | ||||
|  | ||||
|   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { | ||||
|     unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i); | ||||
|  | ||||
|     const TargetRegisterClass *RegClass = MRI->getRegClass(VirtReg); | ||||
|     if (!isSGPR(RegClass)) | ||||
|       continue; | ||||
|  | ||||
|     Changes |= handleVirtReg(VirtReg); | ||||
|   } | ||||
|  | ||||
|   return Changes; | ||||
| } | ||||
|  | ||||
| FunctionPass *llvm::createSIFixSGPRLivenessPass(TargetMachine &tm) { | ||||
|   return new SIFixSGPRLiveness(tm); | ||||
| } | ||||
| @@ -44,8 +44,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : | ||||
|   setOperationAction(ISD::ADD, MVT::i64, Legal); | ||||
|   setOperationAction(ISD::ADD, MVT::i32, Legal); | ||||
|  | ||||
|   setOperationAction(ISD::BR_CC, MVT::i32, Custom); | ||||
|  | ||||
|   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | ||||
|  | ||||
|   // We need to custom lower loads from the USER_SGPR address space, so we can | ||||
| @@ -254,7 +252,7 @@ EVT SITargetLowering::getSetCCResultType(EVT VT) const { | ||||
| SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | ||||
|   switch (Op.getOpcode()) { | ||||
|   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); | ||||
|   case ISD::BR_CC: return LowerBR_CC(Op, DAG); | ||||
|   case ISD::BRCOND: return LowerBRCOND(Op, DAG); | ||||
|   case ISD::LOAD: return LowerLOAD(Op, DAG); | ||||
|   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); | ||||
|   case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); | ||||
| @@ -298,27 +296,99 @@ SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, | ||||
|   return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); | ||||
| } | ||||
|  | ||||
| SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { | ||||
|   SDValue Chain = Op.getOperand(0); | ||||
|   SDValue CC = Op.getOperand(1); | ||||
|   SDValue LHS   = Op.getOperand(2); | ||||
|   SDValue RHS   = Op.getOperand(3); | ||||
|   SDValue JumpT  = Op.getOperand(4); | ||||
|   SDValue CmpValue; | ||||
|   SDValue Result; | ||||
|   CmpValue = DAG.getNode( | ||||
|       ISD::SETCC, | ||||
|       Op.getDebugLoc(), | ||||
|       MVT::i1, | ||||
|       LHS, RHS, | ||||
|       CC); | ||||
| /// \brief Helper function for LowerBRCOND | ||||
| static SDNode *findUser(SDValue Value, unsigned Opcode) { | ||||
|  | ||||
|   Result = DAG.getNode( | ||||
|       AMDGPUISD::BRANCH_COND, | ||||
|       CmpValue.getDebugLoc(), | ||||
|       MVT::Other, Chain, | ||||
|       JumpT, CmpValue); | ||||
|   return Result; | ||||
|   SDNode *Parent = Value.getNode(); | ||||
|   for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end(); | ||||
|        I != E; ++I) { | ||||
|  | ||||
|     if (I.getUse().get() != Value) | ||||
|       continue; | ||||
|  | ||||
|     if (I->getOpcode() == Opcode) | ||||
|       return *I; | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| /// This transforms the control flow intrinsics to get the branch destination as | ||||
| /// last parameter, also switches branch target with BR if the need arise | ||||
| SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, | ||||
|                                       SelectionDAG &DAG) const { | ||||
|  | ||||
|   DebugLoc DL = BRCOND.getDebugLoc(); | ||||
|  | ||||
|   SDNode *Intr = BRCOND.getOperand(1).getNode(); | ||||
|   SDValue Target = BRCOND.getOperand(2); | ||||
|   SDNode *BR = 0; | ||||
|  | ||||
|   if (Intr->getOpcode() == ISD::SETCC) { | ||||
|     // As long as we negate the condition everything is fine | ||||
|     SDNode *SetCC = Intr; | ||||
|     assert(SetCC->getConstantOperandVal(1) == 1); | ||||
|  | ||||
|     CondCodeSDNode *CC = cast<CondCodeSDNode>(SetCC->getOperand(2).getNode()); | ||||
|     assert(CC->get() == ISD::SETNE); | ||||
|     Intr = SetCC->getOperand(0).getNode(); | ||||
|  | ||||
|   } else { | ||||
|     // Get the target from BR if we don't negate the condition | ||||
|     BR = findUser(BRCOND, ISD::BR); | ||||
|     Target = BR->getOperand(1); | ||||
|   } | ||||
|  | ||||
|   assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); | ||||
|  | ||||
|   // Build the result and | ||||
|   SmallVector<EVT, 4> Res; | ||||
|   for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i) | ||||
|     Res.push_back(Intr->getValueType(i)); | ||||
|  | ||||
|   // operands of the new intrinsic call | ||||
|   SmallVector<SDValue, 4> Ops; | ||||
|   Ops.push_back(BRCOND.getOperand(0)); | ||||
|   for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i) | ||||
|     Ops.push_back(Intr->getOperand(i)); | ||||
|   Ops.push_back(Target); | ||||
|  | ||||
|   // build the new intrinsic call | ||||
|   SDNode *Result = DAG.getNode( | ||||
|     Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, | ||||
|     DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode(); | ||||
|  | ||||
|   if (BR) { | ||||
|     // Give the branch instruction our target | ||||
|     SDValue Ops[] = { | ||||
|       BR->getOperand(0), | ||||
|       BRCOND.getOperand(2) | ||||
|     }; | ||||
|     DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2); | ||||
|   } | ||||
|  | ||||
|   SDValue Chain = SDValue(Result, Result->getNumValues() - 1); | ||||
|  | ||||
|   // Copy the intrinsic results to registers | ||||
|   for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) { | ||||
|     SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg); | ||||
|     if (!CopyToReg) | ||||
|       continue; | ||||
|  | ||||
|     Chain = DAG.getCopyToReg( | ||||
|       Chain, DL, | ||||
|       CopyToReg->getOperand(1), | ||||
|       SDValue(Result, i - 1), | ||||
|       SDValue()); | ||||
|  | ||||
|     DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0)); | ||||
|   } | ||||
|  | ||||
|   // Remove the old intrinsic from the chain | ||||
|   DAG.ReplaceAllUsesOfValueWith( | ||||
|     SDValue(Intr, Intr->getNumValues() - 1), | ||||
|     Intr->getOperand(0)); | ||||
|  | ||||
|   return Chain; | ||||
| } | ||||
|  | ||||
| SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { | ||||
|   | ||||
| @@ -43,9 +43,9 @@ class SITargetLowering : public AMDGPUTargetLowering { | ||||
|  | ||||
|   SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, | ||||
|                                            unsigned VCCNode) const; | ||||
|   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; | ||||
|   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; | ||||
|   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; | ||||
|   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; | ||||
|  | ||||
| public: | ||||
|   SITargetLowering(TargetMachine &tm); | ||||
|   | ||||
| @@ -696,8 +696,9 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", | ||||
| let isBranch = 1 in { | ||||
| def S_BRANCH : SOPP < | ||||
|   0x00000002, (ins brtarget:$target), "S_BRANCH", | ||||
|   [] | ||||
| >; | ||||
|   [(br bb:$target)]> { | ||||
|   let isBarrier = 1; | ||||
| } | ||||
|  | ||||
| let DisableEncoding = "$scc" in { | ||||
| def S_CBRANCH_SCC0 : SOPP < | ||||
| @@ -1095,26 +1096,70 @@ def SI_WQM : InstSI < | ||||
|  | ||||
| } // end usesCustomInserter  | ||||
|  | ||||
| // SI Psuedo branch instructions.  These are used by the CFG structurizer pass | ||||
| // SI Psuedo instructions. These are used by the CFG structurizer pass | ||||
| // and should be lowered to ISA instructions prior to codegen. | ||||
|  | ||||
| let isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0, | ||||
|                                                  hasSideEffects = 0 in { | ||||
| def SI_IF_NZ : InstSI < | ||||
|   (outs), | ||||
|   (ins brtarget:$target, SReg_1:$vcc), | ||||
|   "SI_BRANCH_NZ", | ||||
|   [(IL_brcond bb:$target, SReg_1:$vcc)] | ||||
| let mayLoad = 1, mayStore = 1, hasSideEffects = 1, | ||||
|     Uses = [EXEC], Defs = [EXEC] in { | ||||
|  | ||||
| let isBranch = 1, isTerminator = 1 in { | ||||
|  | ||||
| def SI_IF : InstSI < | ||||
|   (outs SReg_64:$dst), | ||||
|   (ins SReg_1:$vcc, brtarget:$target), | ||||
|   "SI_IF", | ||||
|   [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))] | ||||
| >; | ||||
|  | ||||
| def SI_IF_Z : InstSI < | ||||
| def SI_ELSE : InstSI < | ||||
|   (outs SReg_64:$dst), | ||||
|   (ins SReg_64:$src, brtarget:$target), | ||||
|   "SI_ELSE", | ||||
|   [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> { | ||||
|  | ||||
|   let Constraints = "$src = $dst"; | ||||
| } | ||||
|  | ||||
| def SI_LOOP : InstSI < | ||||
|   (outs), | ||||
|   (ins brtarget:$target, SReg_1:$vcc), | ||||
|   "SI_BRANCH_Z", | ||||
|   [] | ||||
|   (ins SReg_64:$saved, brtarget:$target), | ||||
|   "SI_LOOP", | ||||
|   [(int_SI_loop SReg_64:$saved, bb:$target)] | ||||
| >; | ||||
| } // end isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0, | ||||
|   //     hasSideEffects = 0 | ||||
|  | ||||
| } // end isBranch = 1, isTerminator = 1 | ||||
|  | ||||
| def SI_BREAK : InstSI < | ||||
|   (outs SReg_64:$dst), | ||||
|   (ins SReg_64:$src), | ||||
|   "SI_ELSE", | ||||
|   [(set SReg_64:$dst, (int_SI_break SReg_64:$src))] | ||||
| >; | ||||
|  | ||||
| def SI_IF_BREAK : InstSI < | ||||
|   (outs SReg_64:$dst), | ||||
|   (ins SReg_1:$vcc, SReg_64:$src), | ||||
|   "SI_IF_BREAK", | ||||
|   [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))] | ||||
| >; | ||||
|  | ||||
| def SI_ELSE_BREAK : InstSI < | ||||
|   (outs SReg_64:$dst), | ||||
|   (ins SReg_64:$src0, SReg_64:$src1), | ||||
|   "SI_ELSE_BREAK", | ||||
|   [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))] | ||||
| >; | ||||
|  | ||||
| def SI_END_CF : InstSI < | ||||
|   (outs), | ||||
|   (ins SReg_64:$saved), | ||||
|   "SI_END_CF", | ||||
|   [(int_SI_end_cf SReg_64:$saved)] | ||||
| >; | ||||
|  | ||||
| } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 | ||||
|   // Uses = [EXEC], Defs = [EXEC] | ||||
|  | ||||
| } // end IsCodeGenOnly, isPseudo | ||||
|  | ||||
| /* int_SI_vs_load_input */ | ||||
|   | ||||
| @@ -39,4 +39,14 @@ let TargetPrefix = "SI", isTarget = 1 in { | ||||
|  | ||||
|   def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>; | ||||
|   def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; | ||||
|  | ||||
|   /* Control flow Intrinsics */ | ||||
|  | ||||
|   def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>; | ||||
|   def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>; | ||||
|   def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; | ||||
|   def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>; | ||||
|   def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>; | ||||
|   def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>; | ||||
|   def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>; | ||||
| } | ||||
|   | ||||
| @@ -8,10 +8,10 @@ | ||||
| //===----------------------------------------------------------------------===// | ||||
| // | ||||
| /// \file | ||||
| /// \brief This pass lowers the pseudo control flow instructions (SI_IF_NZ, ELSE, ENDIF) | ||||
| /// to predicated instructions. | ||||
| /// \brief This pass lowers the pseudo control flow instructions to real | ||||
| /// machine instructions. | ||||
| /// | ||||
| /// All control flow (except loops) is handled using predicated instructions and | ||||
| /// All control flow is handled using predicated instructions and | ||||
| /// a predicate stack.  Each Scalar ALU controls the operations of 64 Vector | ||||
| /// ALUs.  The Scalar ALU can update the predicate for any of the Vector ALUs | ||||
| /// by writting to the 64-bit EXEC register (each bit corresponds to a | ||||
| @@ -22,17 +22,17 @@ | ||||
| /// | ||||
| /// For example: | ||||
| /// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 | ||||
| /// SI_IF_NZ %VCC | ||||
| /// %SGPR0 = SI_IF %VCC | ||||
| ///   %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 | ||||
| /// ELSE | ||||
| /// %SGPR0 = SI_ELSE %SGPR0 | ||||
| ///   %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 | ||||
| /// ENDIF | ||||
| /// SI_END_CF %SGPR0 | ||||
| /// | ||||
| /// becomes: | ||||
| /// | ||||
| /// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC  // Save and update the exec mask | ||||
| /// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC  // Clear live bits from saved exec mask | ||||
| /// S_CBRANCH_EXECZ label0            // This instruction is an | ||||
| /// S_CBRANCH_EXECZ label0            // This instruction is an optional | ||||
| ///                                   // optimization which allows us to | ||||
| ///                                   // branch if all the bits of | ||||
| ///                                   // EXEC are zero. | ||||
| @@ -45,7 +45,7 @@ | ||||
| ///                                    // instruction again. | ||||
| /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR   // Do the THEN block | ||||
| /// label1: | ||||
| /// %EXEC = S_OR_B64 %EXEC, %SGPR2     // Re-enable saved exec mask bits | ||||
| /// %EXEC = S_OR_B64 %EXEC, %SGPR0     // Re-enable saved exec mask bits | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| #include "AMDGPU.h" | ||||
| @@ -65,11 +65,14 @@ class SILowerControlFlowPass : public MachineFunctionPass { | ||||
| private: | ||||
|   static char ID; | ||||
|   const TargetInstrInfo *TII; | ||||
|   std::vector<unsigned> PredicateStack; | ||||
|   std::vector<unsigned> UnusedRegisters; | ||||
|  | ||||
|   unsigned allocReg(); | ||||
|   void freeReg(unsigned Reg); | ||||
|   void If(MachineInstr &MI); | ||||
|   void Else(MachineInstr &MI); | ||||
|   void Break(MachineInstr &MI); | ||||
|   void IfBreak(MachineInstr &MI); | ||||
|   void ElseBreak(MachineInstr &MI); | ||||
|   void Loop(MachineInstr &MI); | ||||
|   void EndCf(MachineInstr &MI); | ||||
|  | ||||
| public: | ||||
|   SILowerControlFlowPass(TargetMachine &tm) : | ||||
| @@ -91,101 +94,199 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { | ||||
|   return new SILowerControlFlowPass(tm); | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::If(MachineInstr &MI) { | ||||
|  | ||||
|   MachineBasicBlock &MBB = *MI.getParent(); | ||||
|   DebugLoc DL = MI.getDebugLoc(); | ||||
|   unsigned Reg = MI.getOperand(0).getReg(); | ||||
|   unsigned Vcc = MI.getOperand(1).getReg(); | ||||
|  | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg) | ||||
|           .addReg(Vcc); | ||||
|  | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg) | ||||
|           .addReg(AMDGPU::EXEC) | ||||
|           .addReg(Reg); | ||||
|  | ||||
|   MI.eraseFromParent(); | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::Else(MachineInstr &MI) { | ||||
|  | ||||
|   MachineBasicBlock &MBB = *MI.getParent(); | ||||
|   DebugLoc DL = MI.getDebugLoc(); | ||||
|   unsigned Dst = MI.getOperand(0).getReg(); | ||||
|   unsigned Src = MI.getOperand(1).getReg(); | ||||
|  | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) | ||||
|           .addReg(Src); // Saved EXEC | ||||
|  | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) | ||||
|           .addReg(AMDGPU::EXEC) | ||||
|           .addReg(Dst); | ||||
|  | ||||
|   MI.eraseFromParent(); | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::Break(MachineInstr &MI) { | ||||
|  | ||||
|   MachineBasicBlock &MBB = *MI.getParent(); | ||||
|   DebugLoc DL = MI.getDebugLoc(); | ||||
|  | ||||
|   unsigned Dst = MI.getOperand(0).getReg(); | ||||
|   unsigned Src = MI.getOperand(1).getReg(); | ||||
|   | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) | ||||
|           .addReg(AMDGPU::EXEC) | ||||
|           .addReg(Src); | ||||
|  | ||||
|   MI.eraseFromParent(); | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::IfBreak(MachineInstr &MI) { | ||||
|  | ||||
|   MachineBasicBlock &MBB = *MI.getParent(); | ||||
|   DebugLoc DL = MI.getDebugLoc(); | ||||
|  | ||||
|   unsigned Dst = MI.getOperand(0).getReg(); | ||||
|   unsigned Vcc = MI.getOperand(1).getReg(); | ||||
|   unsigned Src = MI.getOperand(2).getReg(); | ||||
|   | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) | ||||
|           .addReg(Vcc) | ||||
|           .addReg(Src); | ||||
|  | ||||
|   MI.eraseFromParent(); | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) { | ||||
|  | ||||
|   MachineBasicBlock &MBB = *MI.getParent(); | ||||
|   DebugLoc DL = MI.getDebugLoc(); | ||||
|  | ||||
|   unsigned Dst = MI.getOperand(0).getReg(); | ||||
|   unsigned Saved = MI.getOperand(1).getReg(); | ||||
|   unsigned Src = MI.getOperand(2).getReg(); | ||||
|   | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) | ||||
|           .addReg(Saved) | ||||
|           .addReg(Src); | ||||
|  | ||||
|   MI.eraseFromParent(); | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::Loop(MachineInstr &MI) { | ||||
|  | ||||
|   MachineBasicBlock &MBB = *MI.getParent(); | ||||
|   DebugLoc DL = MI.getDebugLoc(); | ||||
|   unsigned Src = MI.getOperand(0).getReg(); | ||||
|  | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC) | ||||
|           .addReg(AMDGPU::EXEC) | ||||
|           .addReg(Src); | ||||
|  | ||||
|   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) | ||||
|           .addOperand(MI.getOperand(1)) | ||||
|           .addReg(AMDGPU::EXEC); | ||||
|  | ||||
|   MI.eraseFromParent(); | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::EndCf(MachineInstr &MI) { | ||||
|  | ||||
|   MachineBasicBlock &MBB = *MI.getParent(); | ||||
|   DebugLoc DL = MI.getDebugLoc(); | ||||
|   unsigned Reg = MI.getOperand(0).getReg(); | ||||
|  | ||||
|   BuildMI(MBB, MBB.getFirstNonPHI(), DL, | ||||
|           TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC) | ||||
|           .addReg(AMDGPU::EXEC) | ||||
|           .addReg(Reg); | ||||
|  | ||||
|   MI.eraseFromParent(); | ||||
| } | ||||
|  | ||||
| bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { | ||||
|  | ||||
|   // Find all the unused registers that can be used for the predicate stack. | ||||
|   for (TargetRegisterClass::iterator I = AMDGPU::SReg_64RegClass.begin(), | ||||
|                                      S = AMDGPU::SReg_64RegClass.end(); | ||||
|                                      I != S; ++I) { | ||||
|     unsigned Reg = *I; | ||||
|     if (!MF.getRegInfo().isPhysRegUsed(Reg)) { | ||||
|       UnusedRegisters.insert(UnusedRegisters.begin(), Reg); | ||||
|     } | ||||
|   } | ||||
|   bool HaveCf = false; | ||||
|  | ||||
|   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); | ||||
|                                                   BB != BB_E; ++BB) { | ||||
|     MachineBasicBlock &MBB = *BB; | ||||
|   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); | ||||
|        BI != BE; ++BI) { | ||||
|  | ||||
|     MachineBasicBlock &MBB = *BI; | ||||
|     for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); | ||||
|                                I != MBB.end(); I = Next) { | ||||
|          I != MBB.end(); I = Next) { | ||||
|  | ||||
|       Next = llvm::next(I); | ||||
|       MachineInstr &MI = *I; | ||||
|       unsigned Reg; | ||||
|       switch (MI.getOpcode()) { | ||||
|         default: break; | ||||
|         case AMDGPU::SI_IF_NZ: | ||||
|           Reg = allocReg(); | ||||
|           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_SAVEEXEC_B64), | ||||
|                   Reg) | ||||
|                   .addOperand(MI.getOperand(0)); // VCC | ||||
|           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64), | ||||
|                   Reg) | ||||
|                   .addReg(Reg) | ||||
|                   .addReg(AMDGPU::EXEC); | ||||
|           MI.eraseFromParent(); | ||||
|           PredicateStack.push_back(Reg); | ||||
|         case AMDGPU::SI_IF: | ||||
|           If(MI); | ||||
|           break; | ||||
|  | ||||
|         case AMDGPU::ELSE: | ||||
|           Reg = PredicateStack.back(); | ||||
|           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_SAVEEXEC_B64), | ||||
|                   Reg) | ||||
|                   .addReg(Reg); | ||||
|           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64), | ||||
|                   AMDGPU::EXEC) | ||||
|                   .addReg(Reg) | ||||
|                   .addReg(AMDGPU::EXEC); | ||||
|           MI.eraseFromParent(); | ||||
|         case AMDGPU::SI_ELSE: | ||||
|           Else(MI); | ||||
|           break; | ||||
|  | ||||
|         case AMDGPU::ENDIF: | ||||
|           Reg = PredicateStack.back(); | ||||
|           PredicateStack.pop_back(); | ||||
|           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64), | ||||
|                   AMDGPU::EXEC) | ||||
|                   .addReg(AMDGPU::EXEC) | ||||
|                   .addReg(Reg); | ||||
|           freeReg(Reg); | ||||
|         case AMDGPU::SI_BREAK: | ||||
|           Break(MI); | ||||
|           break; | ||||
|  | ||||
|           if (MF.getInfo<SIMachineFunctionInfo>()->ShaderType == ShaderType::PIXEL && | ||||
|               PredicateStack.empty()) { | ||||
|             // If the exec mask is non-zero, skip the next two instructions | ||||
|             BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ)) | ||||
|                     .addImm(3) | ||||
|                     .addReg(AMDGPU::EXEC); | ||||
|         case AMDGPU::SI_IF_BREAK: | ||||
|           IfBreak(MI); | ||||
|           break; | ||||
|  | ||||
|             // Exec mask is zero: Export to NULL target... | ||||
|             BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::EXP)) | ||||
|                     .addImm(0) | ||||
|                     .addImm(0x09) // V_008DFC_SQ_EXP_NULL | ||||
|                     .addImm(0) | ||||
|                     .addImm(1) | ||||
|                     .addImm(1) | ||||
|                     .addReg(AMDGPU::SREG_LIT_0) | ||||
|                     .addReg(AMDGPU::SREG_LIT_0) | ||||
|                     .addReg(AMDGPU::SREG_LIT_0) | ||||
|                     .addReg(AMDGPU::SREG_LIT_0); | ||||
|         case AMDGPU::SI_ELSE_BREAK: | ||||
|           ElseBreak(MI); | ||||
|           break; | ||||
|  | ||||
|             // ... and terminate wavefront | ||||
|             BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM)); | ||||
|           } | ||||
|           MI.eraseFromParent(); | ||||
|         case AMDGPU::SI_LOOP: | ||||
|           Loop(MI); | ||||
|           break; | ||||
|  | ||||
|         case AMDGPU::SI_END_CF: | ||||
|           HaveCf = true; | ||||
|           EndCf(MI); | ||||
|           break; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // TODO: What is this good for? | ||||
|   unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType; | ||||
|   if (HaveCf && ShaderType == ShaderType::PIXEL) { | ||||
|     for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); | ||||
|          BI != BE; ++BI) { | ||||
|  | ||||
|       MachineBasicBlock &MBB = *BI; | ||||
|       if (MBB.succ_empty()) { | ||||
|  | ||||
|         MachineInstr &MI = *MBB.getFirstNonPHI(); | ||||
|         DebugLoc DL = MI.getDebugLoc(); | ||||
|  | ||||
|         // If the exec mask is non-zero, skip the next two instructions | ||||
|         BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) | ||||
|                .addImm(3) | ||||
|                .addReg(AMDGPU::EXEC); | ||||
|  | ||||
|         // Exec mask is zero: Export to NULL target... | ||||
|         BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP)) | ||||
|                 .addImm(0) | ||||
|                 .addImm(0x09) // V_008DFC_SQ_EXP_NULL | ||||
|                 .addImm(0) | ||||
|                 .addImm(1) | ||||
|                 .addImm(1) | ||||
|                 .addReg(AMDGPU::SREG_LIT_0) | ||||
|                 .addReg(AMDGPU::SREG_LIT_0) | ||||
|                 .addReg(AMDGPU::SREG_LIT_0) | ||||
|                 .addReg(AMDGPU::SREG_LIT_0); | ||||
|  | ||||
|         // ... and terminate wavefront | ||||
|         BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM)); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| unsigned SILowerControlFlowPass::allocReg() { | ||||
|  | ||||
|   assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack"); | ||||
|   unsigned Reg = UnusedRegisters.back(); | ||||
|   UnusedRegisters.pop_back(); | ||||
|   return Reg; | ||||
| } | ||||
|  | ||||
| void SILowerControlFlowPass::freeReg(unsigned Reg) { | ||||
|  | ||||
|   UnusedRegisters.push_back(Reg); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user