mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-10-25 10:27:04 +00:00
R600: New control flow for SI v2
This patch replaces the control flow handling with a new pass which structurize the graph before transforming it to machine instruction. This has a couple of different advantages and currently fixes 20 piglit tests without a single regression. It is now a general purpose transformation that could be not only be used for SI/R6xx, but also for other hardware implementations that use a form of structurized control flow. v2: further cleanup, fixes and documentation Patch by: Christian König Signed-off-by: Christian König <deathsimple@vodafone.de> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> Tested-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170591 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -25,13 +25,14 @@ FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
|
||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAnnotateControlFlowPass();
|
||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
|
||||
FunctionPass *createSIFixSGPRLivenessPass(TargetMachine &tm);
|
||||
|
||||
// Passes common to R600 and SI
|
||||
Pass *createAMDGPUStructurizeCFGPass();
|
||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
732
lib/Target/R600/AMDGPUStructurizeCFG.cpp
Normal file
732
lib/Target/R600/AMDGPUStructurizeCFG.cpp
Normal file
@@ -0,0 +1,732 @@
|
||||
//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// The pass implemented in this file transforms the programs control flow
|
||||
/// graph into a form that's suitable for code generation on hardware that
|
||||
/// implements control flow by execution masking. This currently includes all
|
||||
/// AMD GPUs but may as well be useful for other types of hardware.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/Analysis/RegionIterator.h"
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
#include "llvm/Analysis/RegionPass.h"
|
||||
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
// Definition of the complex types used in this pass.
|
||||
|
||||
typedef std::pair<BasicBlock *, Value *> BBValuePair;
|
||||
typedef ArrayRef<BasicBlock*> BBVecRef;
|
||||
|
||||
typedef SmallVector<RegionNode*, 8> RNVector;
|
||||
typedef SmallVector<BasicBlock*, 8> BBVector;
|
||||
typedef SmallVector<BBValuePair, 2> BBValueVector;
|
||||
|
||||
typedef DenseMap<PHINode *, BBValueVector> PhiMap;
|
||||
typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
|
||||
typedef DenseMap<BasicBlock *, Value *> BBPredicates;
|
||||
typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
|
||||
typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
|
||||
|
||||
// The name for newly created blocks.
|
||||
|
||||
static const char *FlowBlockName = "Flow";
|
||||
|
||||
/// @brief Transforms the control flow graph on one single entry/exit region
|
||||
/// at a time.
|
||||
///
|
||||
/// After the transform all "If"/"Then"/"Else" style control flow looks like
|
||||
/// this:
|
||||
///
|
||||
/// \verbatim
|
||||
/// 1
|
||||
/// ||
|
||||
/// | |
|
||||
/// 2 |
|
||||
/// | /
|
||||
/// |/
|
||||
/// 3
|
||||
/// || Where:
|
||||
/// | | 1 = "If" block, calculates the condition
|
||||
/// 4 | 2 = "Then" subregion, runs if the condition is true
|
||||
/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
|
||||
/// |/ 4 = "Else" optional subregion, runs if the condition is false
|
||||
/// 5 5 = "End" block, also rejoins the control flow
|
||||
/// \endverbatim
|
||||
///
|
||||
/// Control flow is expressed as a branch where the true exit goes into the
|
||||
/// "Then"/"Else" region, while the false exit skips the region
|
||||
/// The condition for the optional "Else" region is expressed as a PHI node.
|
||||
/// The incomming values of the PHI node are true for the "If" edge and false
|
||||
/// for the "Then" edge.
|
||||
///
|
||||
/// Additionally to that even complicated loops look like this:
|
||||
///
|
||||
/// \verbatim
|
||||
/// 1
|
||||
/// ||
|
||||
/// | |
|
||||
/// 2 ^ Where:
|
||||
/// | / 1 = "Entry" block
|
||||
/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
|
||||
/// 3 3 = "Flow" block, with back edge to entry block
|
||||
/// |
|
||||
/// \endverbatim
|
||||
///
|
||||
/// The back edge of the "Flow" block is always on the false side of the branch
|
||||
/// while the true side continues the general flow. So the loop condition
|
||||
/// consist of a network of PHI nodes where the true incoming values expresses
|
||||
/// breaks and the false values expresses continue states.
|
||||
class AMDGPUStructurizeCFG : public RegionPass {
|
||||
|
||||
static char ID;
|
||||
|
||||
Type *Boolean;
|
||||
ConstantInt *BoolTrue;
|
||||
ConstantInt *BoolFalse;
|
||||
UndefValue *BoolUndef;
|
||||
|
||||
Function *Func;
|
||||
Region *ParentRegion;
|
||||
|
||||
DominatorTree *DT;
|
||||
|
||||
RNVector Order;
|
||||
VisitedMap Visited;
|
||||
PredMap Predicates;
|
||||
BBPhiMap DeletedPhis;
|
||||
BBVector FlowsInserted;
|
||||
|
||||
BasicBlock *LoopStart;
|
||||
BasicBlock *LoopEnd;
|
||||
BBPredicates LoopPred;
|
||||
|
||||
void orderNodes();
|
||||
|
||||
void buildPredicate(BranchInst *Term, unsigned Idx,
|
||||
BBPredicates &Pred, bool Invert);
|
||||
|
||||
void analyzeBlock(BasicBlock *BB);
|
||||
|
||||
void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
|
||||
|
||||
void collectInfos();
|
||||
|
||||
bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
|
||||
|
||||
void killTerminator(BasicBlock *BB);
|
||||
|
||||
RegionNode *skipChained(RegionNode *Node);
|
||||
|
||||
void delPhiValues(BasicBlock *From, BasicBlock *To);
|
||||
|
||||
void addPhiValues(BasicBlock *From, BasicBlock *To);
|
||||
|
||||
BasicBlock *getNextFlow(BasicBlock *Prev);
|
||||
|
||||
bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
|
||||
|
||||
BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
|
||||
|
||||
void createFlow();
|
||||
|
||||
void insertConditions();
|
||||
|
||||
void rebuildSSA();
|
||||
|
||||
public:
|
||||
AMDGPUStructurizeCFG():
|
||||
RegionPass(ID) {
|
||||
|
||||
initializeRegionInfoPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
virtual bool doInitialization(Region *R, RGPassManager &RGM);
|
||||
|
||||
virtual bool runOnRegion(Region *R, RGPassManager &RGM);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "AMDGPU simplify control flow";
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
|
||||
AU.addRequired<DominatorTree>();
|
||||
AU.addPreserved<DominatorTree>();
|
||||
RegionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char AMDGPUStructurizeCFG::ID = 0;
|
||||
|
||||
/// \brief Initialize the types and constants used in the pass
|
||||
bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
|
||||
|
||||
LLVMContext &Context = R->getEntry()->getContext();
|
||||
|
||||
Boolean = Type::getInt1Ty(Context);
|
||||
BoolTrue = ConstantInt::getTrue(Context);
|
||||
BoolFalse = ConstantInt::getFalse(Context);
|
||||
BoolUndef = UndefValue::get(Boolean);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Build up the general order of nodes
|
||||
void AMDGPUStructurizeCFG::orderNodes() {
|
||||
|
||||
scc_iterator<Region *> I = scc_begin(ParentRegion),
|
||||
E = scc_end(ParentRegion);
|
||||
for (Order.clear(); I != E; ++I) {
|
||||
std::vector<RegionNode *> &Nodes = *I;
|
||||
Order.append(Nodes.begin(), Nodes.end());
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Build blocks and loop predicates
|
||||
void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
|
||||
BBPredicates &Pred, bool Invert) {
|
||||
|
||||
Value *True = Invert ? BoolFalse : BoolTrue;
|
||||
Value *False = Invert ? BoolTrue : BoolFalse;
|
||||
|
||||
RegionInfo *RI = ParentRegion->getRegionInfo();
|
||||
BasicBlock *BB = Term->getParent();
|
||||
|
||||
// Handle the case where multiple regions start at the same block
|
||||
Region *R = BB != ParentRegion->getEntry() ?
|
||||
RI->getRegionFor(BB) : ParentRegion;
|
||||
|
||||
if (R == ParentRegion) {
|
||||
// It's a top level block in our region
|
||||
Value *Cond = True;
|
||||
if (Term->isConditional()) {
|
||||
BasicBlock *Other = Term->getSuccessor(!Idx);
|
||||
|
||||
if (Visited.count(Other)) {
|
||||
if (!Pred.count(Other))
|
||||
Pred[Other] = False;
|
||||
|
||||
if (!Pred.count(BB))
|
||||
Pred[BB] = True;
|
||||
return;
|
||||
}
|
||||
Cond = Term->getCondition();
|
||||
|
||||
if (Idx != Invert)
|
||||
Cond = BinaryOperator::CreateNot(Cond, "", Term);
|
||||
}
|
||||
|
||||
Pred[BB] = Cond;
|
||||
|
||||
} else if (ParentRegion->contains(R)) {
|
||||
// It's a block in a sub region
|
||||
while(R->getParent() != ParentRegion)
|
||||
R = R->getParent();
|
||||
|
||||
Pred[R->getEntry()] = True;
|
||||
|
||||
} else {
|
||||
// It's a branch from outside into our parent region
|
||||
Pred[BB] = True;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Analyze the successors of each block and build up predicates
|
||||
void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
|
||||
|
||||
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
|
||||
BBPredicates &Pred = Predicates[BB];
|
||||
|
||||
for (; PI != PE; ++PI) {
|
||||
BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
|
||||
|
||||
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
|
||||
BasicBlock *Succ = Term->getSuccessor(i);
|
||||
if (Succ != BB)
|
||||
continue;
|
||||
buildPredicate(Term, i, Pred, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Analyze the conditions leading to loop to a previous block
|
||||
void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
|
||||
|
||||
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
|
||||
|
||||
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
|
||||
BasicBlock *Succ = Term->getSuccessor(i);
|
||||
|
||||
// Ignore it if it's not a back edge
|
||||
if (!Visited.count(Succ))
|
||||
continue;
|
||||
|
||||
buildPredicate(Term, i, LoopPred, true);
|
||||
|
||||
LoopEnd = BB;
|
||||
if (Visited[Succ] < LoopIdx) {
|
||||
LoopIdx = Visited[Succ];
|
||||
LoopStart = Succ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Collect various loop and predicate infos
|
||||
void AMDGPUStructurizeCFG::collectInfos() {
|
||||
|
||||
unsigned Number = 0, LoopIdx = ~0;
|
||||
|
||||
// Reset predicate
|
||||
Predicates.clear();
|
||||
|
||||
// and loop infos
|
||||
LoopStart = LoopEnd = 0;
|
||||
LoopPred.clear();
|
||||
|
||||
RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
|
||||
for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
|
||||
|
||||
// Analyze all the conditions leading to a node
|
||||
analyzeBlock((*OI)->getEntry());
|
||||
|
||||
if ((*OI)->isSubRegion())
|
||||
continue;
|
||||
|
||||
// Find the first/last loop nodes and loop predicates
|
||||
analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Does A dominate all the predicates of B ?
|
||||
bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
|
||||
|
||||
BBPredicates &Preds = Predicates[B];
|
||||
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
|
||||
PI != PE; ++PI) {
|
||||
|
||||
if (!DT->dominates(A, PI->first))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Remove phi values from all successors and the remove the terminator.
|
||||
void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
|
||||
|
||||
TerminatorInst *Term = BB->getTerminator();
|
||||
if (!Term)
|
||||
return;
|
||||
|
||||
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
|
||||
SI != SE; ++SI) {
|
||||
|
||||
delPhiValues(BB, *SI);
|
||||
}
|
||||
|
||||
Term->eraseFromParent();
|
||||
}
|
||||
|
||||
/// First: Skip forward to the first region node that either isn't a subregion or not
|
||||
/// dominating it's exit, remove all the skipped nodes from the node order.
|
||||
///
|
||||
/// Second: Handle the first successor directly if the resulting nodes successor
|
||||
/// predicates are still dominated by the original entry
|
||||
RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
|
||||
|
||||
BasicBlock *Entry = Node->getEntry();
|
||||
|
||||
// Skip forward as long as it is just a linear flow
|
||||
while (true) {
|
||||
BasicBlock *Entry = Node->getEntry();
|
||||
BasicBlock *Exit;
|
||||
|
||||
if (Node->isSubRegion()) {
|
||||
Exit = Node->getNodeAs<Region>()->getExit();
|
||||
} else {
|
||||
TerminatorInst *Term = Entry->getTerminator();
|
||||
if (Term->getNumSuccessors() != 1)
|
||||
break;
|
||||
Exit = Term->getSuccessor(0);
|
||||
}
|
||||
|
||||
// It's a back edge, break here so we can insert a loop node
|
||||
if (!Visited.count(Exit))
|
||||
return Node;
|
||||
|
||||
// More than node edges are pointing to exit
|
||||
if (!DT->dominates(Entry, Exit))
|
||||
return Node;
|
||||
|
||||
RegionNode *Next = ParentRegion->getNode(Exit);
|
||||
RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
|
||||
assert(I != Order.end());
|
||||
|
||||
Visited.erase(Next->getEntry());
|
||||
Order.erase(I);
|
||||
Node = Next;
|
||||
}
|
||||
|
||||
BasicBlock *BB = Node->getEntry();
|
||||
TerminatorInst *Term = BB->getTerminator();
|
||||
if (Term->getNumSuccessors() != 2)
|
||||
return Node;
|
||||
|
||||
// Our node has exactly two succesors, check if we can handle
|
||||
// any of them directly
|
||||
BasicBlock *Succ = Term->getSuccessor(0);
|
||||
if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
|
||||
Succ = Term->getSuccessor(1);
|
||||
if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
|
||||
return Node;
|
||||
} else {
|
||||
BasicBlock *Succ2 = Term->getSuccessor(1);
|
||||
if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
|
||||
dominatesPredicates(Entry, Succ2))
|
||||
Succ = Succ2;
|
||||
}
|
||||
|
||||
RegionNode *Next = ParentRegion->getNode(Succ);
|
||||
RNVector::iterator E = Order.end();
|
||||
RNVector::iterator I = std::find(Order.begin(), E, Next);
|
||||
assert(I != E);
|
||||
|
||||
killTerminator(BB);
|
||||
FlowsInserted.push_back(BB);
|
||||
Visited.erase(Succ);
|
||||
Order.erase(I);
|
||||
return ParentRegion->getNode(wireFlowBlock(BB, Next));
|
||||
}
|
||||
|
||||
/// \brief Remove all PHI values coming from "From" into "To" and remember
|
||||
/// them in DeletedPhis
|
||||
void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
|
||||
|
||||
PhiMap &Map = DeletedPhis[To];
|
||||
for (BasicBlock::iterator I = To->begin(), E = To->end();
|
||||
I != E && isa<PHINode>(*I);) {
|
||||
|
||||
PHINode &Phi = cast<PHINode>(*I++);
|
||||
while (Phi.getBasicBlockIndex(From) != -1) {
|
||||
Value *Deleted = Phi.removeIncomingValue(From, false);
|
||||
Map[&Phi].push_back(std::make_pair(From, Deleted));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Add the PHI values back once we knew the new predecessor
|
||||
void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
|
||||
|
||||
if (!DeletedPhis.count(To))
|
||||
return;
|
||||
|
||||
PhiMap &Map = DeletedPhis[To];
|
||||
SSAUpdater Updater;
|
||||
|
||||
for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
|
||||
|
||||
PHINode *Phi = I->first;
|
||||
Updater.Initialize(Phi->getType(), "");
|
||||
BasicBlock *Fallback = To;
|
||||
bool HaveFallback = false;
|
||||
|
||||
for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
|
||||
VI != VE; ++VI) {
|
||||
|
||||
Updater.AddAvailableValue(VI->first, VI->second);
|
||||
BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
|
||||
if (Dom == VI->first)
|
||||
HaveFallback = true;
|
||||
else if (Dom != Fallback)
|
||||
HaveFallback = false;
|
||||
Fallback = Dom;
|
||||
}
|
||||
if (!HaveFallback) {
|
||||
Value *Undef = UndefValue::get(Phi->getType());
|
||||
Updater.AddAvailableValue(Fallback, Undef);
|
||||
}
|
||||
|
||||
Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
|
||||
}
|
||||
DeletedPhis.erase(To);
|
||||
}
|
||||
|
||||
/// \brief Create a new flow node and update dominator tree and region info
|
||||
BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
|
||||
|
||||
LLVMContext &Context = Func->getContext();
|
||||
BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
|
||||
Order.back()->getEntry();
|
||||
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
|
||||
Func, Insert);
|
||||
DT->addNewBlock(Flow, Prev);
|
||||
ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
|
||||
FlowsInserted.push_back(Flow);
|
||||
return Flow;
|
||||
}
|
||||
|
||||
/// \brief Can we predict that this node will always be called?
|
||||
bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
|
||||
BasicBlock *Node) {
|
||||
|
||||
BBPredicates &Preds = Predicates[Node];
|
||||
bool Dominated = false;
|
||||
|
||||
for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
|
||||
I != E; ++I) {
|
||||
|
||||
if (I->second != BoolTrue)
|
||||
return false;
|
||||
|
||||
if (!Dominated && DT->dominates(I->first, Prev))
|
||||
Dominated = true;
|
||||
}
|
||||
return Dominated;
|
||||
}
|
||||
|
||||
/// \brief Wire up the new control flow by inserting or updating the branch
|
||||
/// instructions at node exits
|
||||
BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
|
||||
RegionNode *Node) {
|
||||
|
||||
BasicBlock *Entry = Node->getEntry();
|
||||
|
||||
if (LoopStart == Entry) {
|
||||
LoopStart = Prev;
|
||||
LoopPred[Prev] = BoolTrue;
|
||||
}
|
||||
|
||||
// Wire it up temporary, skipChained may recurse into us
|
||||
BranchInst::Create(Entry, Prev);
|
||||
DT->changeImmediateDominator(Entry, Prev);
|
||||
addPhiValues(Prev, Entry);
|
||||
|
||||
Node = skipChained(Node);
|
||||
|
||||
BasicBlock *Next = getNextFlow(Prev);
|
||||
if (!isPredictableTrue(Prev, Entry)) {
|
||||
// Let Prev point to entry and next block
|
||||
Prev->getTerminator()->eraseFromParent();
|
||||
BranchInst::Create(Entry, Next, BoolUndef, Prev);
|
||||
} else {
|
||||
DT->changeImmediateDominator(Next, Entry);
|
||||
}
|
||||
|
||||
// Let node exit(s) point to next block
|
||||
if (Node->isSubRegion()) {
|
||||
Region *SubRegion = Node->getNodeAs<Region>();
|
||||
BasicBlock *Exit = SubRegion->getExit();
|
||||
|
||||
// Find all the edges from the sub region to the exit
|
||||
BBVector ToDo;
|
||||
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
|
||||
if (SubRegion->contains(*I))
|
||||
ToDo.push_back(*I);
|
||||
}
|
||||
|
||||
// Modify the edges to point to the new flow block
|
||||
for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
|
||||
delPhiValues(*I, Exit);
|
||||
TerminatorInst *Term = (*I)->getTerminator();
|
||||
Term->replaceUsesOfWith(Exit, Next);
|
||||
}
|
||||
|
||||
// Update the region info
|
||||
SubRegion->replaceExit(Next);
|
||||
|
||||
} else {
|
||||
BasicBlock *BB = Node->getNodeAs<BasicBlock>();
|
||||
killTerminator(BB);
|
||||
BranchInst::Create(Next, BB);
|
||||
|
||||
if (BB == LoopEnd)
|
||||
LoopEnd = 0;
|
||||
}
|
||||
|
||||
return Next;
|
||||
}
|
||||
|
||||
/// Destroy node order and visited map, build up flow order instead.
|
||||
/// After this function control flow looks like it should be, but
|
||||
/// branches only have undefined conditions.
|
||||
void AMDGPUStructurizeCFG::createFlow() {
|
||||
|
||||
DeletedPhis.clear();
|
||||
|
||||
BasicBlock *Prev = Order.pop_back_val()->getEntry();
|
||||
assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
|
||||
Visited.erase(Prev);
|
||||
|
||||
if (LoopStart == Prev) {
|
||||
// Loop starts at entry, split entry so that we can predicate it
|
||||
BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
|
||||
BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
|
||||
DT->addNewBlock(Split, Prev);
|
||||
ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
|
||||
Predicates[Split] = Predicates[Prev];
|
||||
Order.push_back(ParentRegion->getBBNode(Split));
|
||||
LoopPred[Prev] = BoolTrue;
|
||||
|
||||
} else if (LoopStart == Order.back()->getEntry()) {
|
||||
// Loop starts behind entry, split entry so that we can jump to it
|
||||
Instruction *Term = Prev->getTerminator();
|
||||
BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
|
||||
DT->addNewBlock(Split, Prev);
|
||||
ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
|
||||
Prev = Split;
|
||||
}
|
||||
|
||||
killTerminator(Prev);
|
||||
FlowsInserted.clear();
|
||||
FlowsInserted.push_back(Prev);
|
||||
|
||||
while (!Order.empty()) {
|
||||
RegionNode *Node = Order.pop_back_val();
|
||||
Visited.erase(Node->getEntry());
|
||||
Prev = wireFlowBlock(Prev, Node);
|
||||
if (LoopStart && !LoopEnd) {
|
||||
// Create an extra loop end node
|
||||
LoopEnd = Prev;
|
||||
Prev = getNextFlow(LoopEnd);
|
||||
BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
|
||||
addPhiValues(LoopEnd, LoopStart);
|
||||
}
|
||||
}
|
||||
|
||||
BasicBlock *Exit = ParentRegion->getExit();
|
||||
BranchInst::Create(Exit, Prev);
|
||||
addPhiValues(Prev, Exit);
|
||||
if (DT->dominates(ParentRegion->getEntry(), Exit))
|
||||
DT->changeImmediateDominator(Exit, Prev);
|
||||
|
||||
if (LoopStart && LoopEnd) {
|
||||
BBVector::iterator FI = std::find(FlowsInserted.begin(),
|
||||
FlowsInserted.end(),
|
||||
LoopStart);
|
||||
for (; *FI != LoopEnd; ++FI) {
|
||||
addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
|
||||
}
|
||||
}
|
||||
|
||||
assert(Order.empty());
|
||||
assert(Visited.empty());
|
||||
assert(DeletedPhis.empty());
|
||||
}
|
||||
|
||||
/// \brief Insert the missing branch conditions
|
||||
void AMDGPUStructurizeCFG::insertConditions() {
|
||||
|
||||
SSAUpdater PhiInserter;
|
||||
|
||||
for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
|
||||
FI != FE; ++FI) {
|
||||
|
||||
BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
|
||||
if (Term->isUnconditional())
|
||||
continue;
|
||||
|
||||
PhiInserter.Initialize(Boolean, "");
|
||||
PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
|
||||
|
||||
BasicBlock *Succ = Term->getSuccessor(0);
|
||||
BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
|
||||
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
|
||||
PI != PE; ++PI) {
|
||||
|
||||
PhiInserter.AddAvailableValue(PI->first, PI->second);
|
||||
}
|
||||
|
||||
Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a rare case where the disintegrated nodes instructions
|
||||
/// no longer dominate all their uses. Not sure if this is really nessasary
|
||||
void AMDGPUStructurizeCFG::rebuildSSA() {
|
||||
|
||||
SSAUpdater Updater;
|
||||
for (Region::block_iterator I = ParentRegion->block_begin(),
|
||||
E = ParentRegion->block_end();
|
||||
I != E; ++I) {
|
||||
|
||||
BasicBlock *BB = *I;
|
||||
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
|
||||
II != IE; ++II) {
|
||||
|
||||
bool Initialized = false;
|
||||
for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
|
||||
|
||||
Next = I->getNext();
|
||||
|
||||
Instruction *User = cast<Instruction>(I->getUser());
|
||||
if (User->getParent() == BB) {
|
||||
continue;
|
||||
|
||||
} else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
|
||||
if (UserPN->getIncomingBlock(*I) == BB)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (DT->dominates(II, User))
|
||||
continue;
|
||||
|
||||
if (!Initialized) {
|
||||
Value *Undef = UndefValue::get(II->getType());
|
||||
Updater.Initialize(II->getType(), "");
|
||||
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
|
||||
Updater.AddAvailableValue(BB, II);
|
||||
Initialized = true;
|
||||
}
|
||||
Updater.RewriteUseAfterInsertions(*I);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Run the transformation for each region found
|
||||
bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
|
||||
|
||||
if (R->isTopLevelRegion())
|
||||
return false;
|
||||
|
||||
Func = R->getEntry()->getParent();
|
||||
ParentRegion = R;
|
||||
|
||||
DT = &getAnalysis<DominatorTree>();
|
||||
|
||||
orderNodes();
|
||||
collectInfos();
|
||||
createFlow();
|
||||
insertConditions();
|
||||
rebuildSSA();
|
||||
|
||||
Order.clear();
|
||||
Visited.clear();
|
||||
Predicates.clear();
|
||||
DeletedPhis.clear();
|
||||
FlowsInserted.clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Create the pass
|
||||
Pass *llvm::createAMDGPUStructurizeCFGPass() {
|
||||
return new AMDGPUStructurizeCFG();
|
||||
}
|
||||
@@ -91,6 +91,11 @@ TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
|
||||
bool
|
||||
AMDGPUPassConfig::addPreISel() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
addPass(createAMDGPUStructurizeCFGPass());
|
||||
addPass(createSIAnnotateControlFlowPass());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -107,9 +112,6 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||
addPass(createSIAssignInterpRegsPass(*TM));
|
||||
}
|
||||
addPass(createAMDGPUConvertToISAPass(*TM));
|
||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
addPass(createSIFixSGPRLivenessPass(*TM));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -124,11 +126,10 @@ bool AMDGPUPassConfig::addPreSched2() {
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreEmitPass() {
|
||||
addPass(createAMDGPUCFGPreparationPass(*TM));
|
||||
addPass(createAMDGPUCFGStructurizerPass(*TM));
|
||||
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
addPass(createAMDGPUCFGPreparationPass(*TM));
|
||||
addPass(createAMDGPUCFGStructurizerPass(*TM));
|
||||
addPass(createR600ExpandSpecialInstrsPass(*TM));
|
||||
addPass(&FinalizeMachineBundlesID);
|
||||
} else {
|
||||
|
||||
@@ -2596,7 +2596,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
|
||||
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
|
||||
case AMDGPU::BRANCH_COND_i32:
|
||||
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
|
||||
case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
|
||||
default:
|
||||
assert(0 && "internal error");
|
||||
}
|
||||
@@ -2608,7 +2607,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
|
||||
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
|
||||
case AMDGPU::BRANCH_COND_i32:
|
||||
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
|
||||
case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
|
||||
default:
|
||||
assert(0 && "internal error");
|
||||
}
|
||||
@@ -2658,8 +2656,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
|
||||
return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
|
||||
case AMDGPU::BRANCH_COND_i32:
|
||||
case AMDGPU::BRANCH_COND_f32:
|
||||
case AMDGPU::SI_IF_NZ:
|
||||
case AMDGPU::SI_IF_Z:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
|
||||
@@ -206,68 +206,3 @@ multiclass BranchInstr2<string name> {
|
||||
// Intrinsics support
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILIntrinsics.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instructions support
|
||||
//===--------------------------------------------------------------------===//
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Custom Inserter for Branches and returns, this eventually will be a
|
||||
// seperate pass
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
|
||||
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
|
||||
"; Pseudo unconditional branch instruction",
|
||||
[(br bb:$target)]>;
|
||||
defm BRANCH_COND : BranchConditional<IL_brcond>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Flow and Program control Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator=1 in {
|
||||
def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
|
||||
!strconcat("SWITCH", " $src"), []>;
|
||||
def CASE : ILFormat< (outs), (ins GPRI32:$src),
|
||||
!strconcat("CASE", " $src"), []>;
|
||||
def BREAK : ILFormat< (outs), (ins),
|
||||
"BREAK", []>;
|
||||
def CONTINUE : ILFormat< (outs), (ins),
|
||||
"CONTINUE", []>;
|
||||
def DEFAULT : ILFormat< (outs), (ins),
|
||||
"DEFAULT", []>;
|
||||
def ELSE : ILFormat< (outs), (ins),
|
||||
"ELSE", []>;
|
||||
def ENDSWITCH : ILFormat< (outs), (ins),
|
||||
"ENDSWITCH", []>;
|
||||
def ENDMAIN : ILFormat< (outs), (ins),
|
||||
"ENDMAIN", []>;
|
||||
def END : ILFormat< (outs), (ins),
|
||||
"END", []>;
|
||||
def ENDFUNC : ILFormat< (outs), (ins),
|
||||
"ENDFUNC", []>;
|
||||
def ENDIF : ILFormat< (outs), (ins),
|
||||
"ENDIF", []>;
|
||||
def WHILELOOP : ILFormat< (outs), (ins),
|
||||
"WHILE", []>;
|
||||
def ENDLOOP : ILFormat< (outs), (ins),
|
||||
"ENDLOOP", []>;
|
||||
def FUNC : ILFormat< (outs), (ins),
|
||||
"FUNC", []>;
|
||||
def RETDYN : ILFormat< (outs), (ins),
|
||||
"RET_DYN", []>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
|
||||
defm IFC : BranchInstr2<"IFC">;
|
||||
defm BREAKC : BranchInstr2<"BREAKC">;
|
||||
defm CONTINUEC : BranchInstr2<"CONTINUEC">;
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen
|
||||
AMDGPUAsmPrinter.cpp
|
||||
AMDGPUMCInstLower.cpp
|
||||
AMDGPUSubtarget.cpp
|
||||
AMDGPUStructurizeCFG.cpp
|
||||
AMDGPUTargetMachine.cpp
|
||||
AMDGPUISelLowering.cpp
|
||||
AMDGPUConvertToISA.cpp
|
||||
@@ -37,6 +38,7 @@ add_llvm_target(R600CodeGen
|
||||
R600ISelLowering.cpp
|
||||
R600MachineFunctionInfo.cpp
|
||||
R600RegisterInfo.cpp
|
||||
SIAnnotateControlFlow.cpp
|
||||
SIAssignInterpRegs.cpp
|
||||
SIInstrInfo.cpp
|
||||
SIISelLowering.cpp
|
||||
@@ -44,7 +46,6 @@ add_llvm_target(R600CodeGen
|
||||
SILowerControlFlow.cpp
|
||||
SIMachineFunctionInfo.cpp
|
||||
SIRegisterInfo.cpp
|
||||
SIFixSGPRLiveness.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMR600CodeGen intrinsics_gen)
|
||||
|
||||
@@ -1545,6 +1545,71 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
|
||||
"RETURN", [(IL_retflag)]>;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instructions support
|
||||
//===--------------------------------------------------------------------===//
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Custom Inserter for Branches and returns, this eventually will be a
|
||||
// seperate pass
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
|
||||
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
|
||||
"; Pseudo unconditional branch instruction",
|
||||
[(br bb:$target)]>;
|
||||
defm BRANCH_COND : BranchConditional<IL_brcond>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Flow and Program control Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator=1 in {
|
||||
def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
|
||||
!strconcat("SWITCH", " $src"), []>;
|
||||
def CASE : ILFormat< (outs), (ins GPRI32:$src),
|
||||
!strconcat("CASE", " $src"), []>;
|
||||
def BREAK : ILFormat< (outs), (ins),
|
||||
"BREAK", []>;
|
||||
def CONTINUE : ILFormat< (outs), (ins),
|
||||
"CONTINUE", []>;
|
||||
def DEFAULT : ILFormat< (outs), (ins),
|
||||
"DEFAULT", []>;
|
||||
def ELSE : ILFormat< (outs), (ins),
|
||||
"ELSE", []>;
|
||||
def ENDSWITCH : ILFormat< (outs), (ins),
|
||||
"ENDSWITCH", []>;
|
||||
def ENDMAIN : ILFormat< (outs), (ins),
|
||||
"ENDMAIN", []>;
|
||||
def END : ILFormat< (outs), (ins),
|
||||
"END", []>;
|
||||
def ENDFUNC : ILFormat< (outs), (ins),
|
||||
"ENDFUNC", []>;
|
||||
def ENDIF : ILFormat< (outs), (ins),
|
||||
"ENDIF", []>;
|
||||
def WHILELOOP : ILFormat< (outs), (ins),
|
||||
"WHILE", []>;
|
||||
def ENDLOOP : ILFormat< (outs), (ins),
|
||||
"ENDLOOP", []>;
|
||||
def FUNC : ILFormat< (outs), (ins),
|
||||
"FUNC", []>;
|
||||
def RETDYN : ILFormat< (outs), (ins),
|
||||
"RET_DYN", []>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
|
||||
defm IFC : BranchInstr2<"IFC">;
|
||||
defm BREAKC : BranchInstr2<"BREAKC">;
|
||||
defm CONTINUEC : BranchInstr2<"CONTINUEC">;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ISel Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
337
lib/Target/R600/SIAnnotateControlFlow.cpp
Normal file
337
lib/Target/R600/SIAnnotateControlFlow.cpp
Normal file
@@ -0,0 +1,337 @@
|
||||
//===-- SIAnnotateControlFlow.cpp - ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// Annotates the control flow with hardware specific intrinsics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/ADT/DepthFirstIterator.h"
|
||||
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
// Complex types used in this pass
|
||||
typedef std::pair<BasicBlock *, Value *> StackEntry;
|
||||
typedef SmallVector<StackEntry, 16> StackVector;
|
||||
|
||||
// Intrinsic names the control flow is annotated with
|
||||
static const char *IfIntrinsic = "llvm.SI.if";
|
||||
static const char *ElseIntrinsic = "llvm.SI.else";
|
||||
static const char *BreakIntrinsic = "llvm.SI.break";
|
||||
static const char *IfBreakIntrinsic = "llvm.SI.if.break";
|
||||
static const char *ElseBreakIntrinsic = "llvm.SI.else.break";
|
||||
static const char *LoopIntrinsic = "llvm.SI.loop";
|
||||
static const char *EndCfIntrinsic = "llvm.SI.end.cf";
|
||||
|
||||
class SIAnnotateControlFlow : public FunctionPass {
|
||||
|
||||
static char ID;
|
||||
|
||||
Type *Boolean;
|
||||
Type *Void;
|
||||
Type *Int64;
|
||||
Type *ReturnStruct;
|
||||
|
||||
ConstantInt *BoolTrue;
|
||||
ConstantInt *BoolFalse;
|
||||
UndefValue *BoolUndef;
|
||||
Constant *Int64Zero;
|
||||
|
||||
Constant *If;
|
||||
Constant *Else;
|
||||
Constant *Break;
|
||||
Constant *IfBreak;
|
||||
Constant *ElseBreak;
|
||||
Constant *Loop;
|
||||
Constant *EndCf;
|
||||
|
||||
DominatorTree *DT;
|
||||
StackVector Stack;
|
||||
SSAUpdater PhiInserter;
|
||||
|
||||
bool isTopOfStack(BasicBlock *BB);
|
||||
|
||||
Value *popSaved();
|
||||
|
||||
void push(BasicBlock *BB, Value *Saved);
|
||||
|
||||
bool isElse(PHINode *Phi);
|
||||
|
||||
void eraseIfUnused(PHINode *Phi);
|
||||
|
||||
void openIf(BranchInst *Term);
|
||||
|
||||
void insertElse(BranchInst *Term);
|
||||
|
||||
void handleLoopCondition(Value *Cond);
|
||||
|
||||
void handleLoop(BranchInst *Term);
|
||||
|
||||
void closeControlFlow(BasicBlock *BB);
|
||||
|
||||
public:
|
||||
SIAnnotateControlFlow():
|
||||
FunctionPass(ID) { }
|
||||
|
||||
virtual bool doInitialization(Module &M);
|
||||
|
||||
virtual bool runOnFunction(Function &F);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "SI annotate control flow";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
|
||||
AU.addRequired<DominatorTree>();
|
||||
AU.addPreserved<DominatorTree>();
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char SIAnnotateControlFlow::ID = 0;
|
||||
|
||||
/// \brief Initialize all the types and constants used in the pass
|
||||
bool SIAnnotateControlFlow::doInitialization(Module &M) {
|
||||
|
||||
LLVMContext &Context = M.getContext();
|
||||
|
||||
Void = Type::getVoidTy(Context);
|
||||
Boolean = Type::getInt1Ty(Context);
|
||||
Int64 = Type::getInt64Ty(Context);
|
||||
ReturnStruct = StructType::get(Boolean, Int64, (Type *)0);
|
||||
|
||||
BoolTrue = ConstantInt::getTrue(Context);
|
||||
BoolFalse = ConstantInt::getFalse(Context);
|
||||
BoolUndef = UndefValue::get(Boolean);
|
||||
Int64Zero = ConstantInt::get(Int64, 0);
|
||||
|
||||
If = M.getOrInsertFunction(
|
||||
IfIntrinsic, ReturnStruct, Boolean, (Type *)0);
|
||||
|
||||
Else = M.getOrInsertFunction(
|
||||
ElseIntrinsic, ReturnStruct, Int64, (Type *)0);
|
||||
|
||||
Break = M.getOrInsertFunction(
|
||||
BreakIntrinsic, Int64, Int64, (Type *)0);
|
||||
|
||||
IfBreak = M.getOrInsertFunction(
|
||||
IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0);
|
||||
|
||||
ElseBreak = M.getOrInsertFunction(
|
||||
ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0);
|
||||
|
||||
Loop = M.getOrInsertFunction(
|
||||
LoopIntrinsic, Boolean, Int64, (Type *)0);
|
||||
|
||||
EndCf = M.getOrInsertFunction(
|
||||
EndCfIntrinsic, Void, Int64, (Type *)0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Is BB the last block saved on the stack ?
|
||||
bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
|
||||
return Stack.back().first == BB;
|
||||
}
|
||||
|
||||
/// \brief Pop the last saved value from the control flow stack
|
||||
Value *SIAnnotateControlFlow::popSaved() {
|
||||
return Stack.pop_back_val().second;
|
||||
}
|
||||
|
||||
/// \brief Push a BB and saved value to the control flow stack
|
||||
void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
|
||||
Stack.push_back(std::make_pair(BB, Saved));
|
||||
}
|
||||
|
||||
/// \brief Can the condition represented by this PHI node treated like
|
||||
/// an "Else" block?
|
||||
bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
|
||||
|
||||
BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
|
||||
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
|
||||
if (Phi->getIncomingBlock(i) == IDom) {
|
||||
|
||||
if (Phi->getIncomingValue(i) != BoolTrue)
|
||||
return false;
|
||||
|
||||
} else {
|
||||
if (Phi->getIncomingValue(i) != BoolFalse)
|
||||
return false;
|
||||
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// \brief Erase "Phi" if it is not used any more
|
||||
void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
|
||||
if (!Phi->hasNUsesOrMore(1))
|
||||
Phi->eraseFromParent();
|
||||
}
|
||||
|
||||
/// \brief Open a new "If" block
|
||||
void SIAnnotateControlFlow::openIf(BranchInst *Term) {
|
||||
Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
|
||||
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
|
||||
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
|
||||
}
|
||||
|
||||
/// \brief Close the last "If" block and open a new "Else" block
|
||||
void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
|
||||
Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
|
||||
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
|
||||
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
|
||||
}
|
||||
|
||||
/// \brief Recursively handle the condition leading to a loop
|
||||
void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
|
||||
|
||||
if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
|
||||
|
||||
// Handle all non constant incoming values first
|
||||
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
|
||||
Value *Incoming = Phi->getIncomingValue(i);
|
||||
if (isa<ConstantInt>(Incoming))
|
||||
continue;
|
||||
|
||||
Phi->setIncomingValue(i, BoolFalse);
|
||||
handleLoopCondition(Incoming);
|
||||
}
|
||||
|
||||
BasicBlock *Parent = Phi->getParent();
|
||||
BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
|
||||
|
||||
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
|
||||
|
||||
Value *Incoming = Phi->getIncomingValue(i);
|
||||
if (Incoming != BoolTrue)
|
||||
continue;
|
||||
|
||||
BasicBlock *From = Phi->getIncomingBlock(i);
|
||||
if (From == IDom) {
|
||||
CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
|
||||
if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
|
||||
Value *Args[] = {
|
||||
OldEnd->getArgOperand(0),
|
||||
PhiInserter.GetValueAtEndOfBlock(Parent)
|
||||
};
|
||||
Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
|
||||
PhiInserter.AddAvailableValue(Parent, Ret);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TerminatorInst *Insert = From->getTerminator();
|
||||
Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
|
||||
Value *Ret = CallInst::Create(Break, Arg, "", Insert);
|
||||
PhiInserter.AddAvailableValue(From, Ret);
|
||||
}
|
||||
eraseIfUnused(Phi);
|
||||
|
||||
} else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
|
||||
BasicBlock *Parent = Inst->getParent();
|
||||
TerminatorInst *Insert = Parent->getTerminator();
|
||||
Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
|
||||
Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
|
||||
PhiInserter.AddAvailableValue(Parent, Ret);
|
||||
|
||||
} else {
|
||||
assert(0 && "Unhandled loop condition!");
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Handle a back edge (loop)
|
||||
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
|
||||
|
||||
BasicBlock *Target = Term->getSuccessor(1);
|
||||
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
|
||||
|
||||
PhiInserter.Initialize(Int64, "");
|
||||
PhiInserter.AddAvailableValue(Target, Broken);
|
||||
|
||||
Value *Cond = Term->getCondition();
|
||||
Term->setCondition(BoolTrue);
|
||||
handleLoopCondition(Cond);
|
||||
|
||||
BasicBlock *BB = Term->getParent();
|
||||
Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
|
||||
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
|
||||
PI != PE; ++PI) {
|
||||
|
||||
Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
|
||||
}
|
||||
|
||||
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
|
||||
push(Term->getSuccessor(0), Arg);
|
||||
}
|
||||
|
||||
/// \brief Close the last opened control flow
|
||||
void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
|
||||
CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
|
||||
}
|
||||
|
||||
/// \brief Annotate the control flow with intrinsics so the backend can
|
||||
/// recognize if/then/else and loops.
|
||||
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
|
||||
|
||||
DT = &getAnalysis<DominatorTree>();
|
||||
|
||||
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
|
||||
E = df_end(&F.getEntryBlock()); I != E; ++I) {
|
||||
|
||||
BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
|
||||
|
||||
if (!Term || Term->isUnconditional()) {
|
||||
if (isTopOfStack(*I))
|
||||
closeControlFlow(*I);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (I.nodeVisited(Term->getSuccessor(1))) {
|
||||
if (isTopOfStack(*I))
|
||||
closeControlFlow(*I);
|
||||
handleLoop(Term);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isTopOfStack(*I)) {
|
||||
PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
|
||||
if (Phi && Phi->getParent() == *I && isElse(Phi)) {
|
||||
insertElse(Term);
|
||||
eraseIfUnused(Phi);
|
||||
continue;
|
||||
}
|
||||
closeControlFlow(*I);
|
||||
}
|
||||
openIf(Term);
|
||||
}
|
||||
|
||||
assert(Stack.empty());
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Create the annotation pass
|
||||
FunctionPass *llvm::createSIAnnotateControlFlowPass() {
|
||||
|
||||
return new SIAnnotateControlFlow();
|
||||
}
|
||||
@@ -1,179 +0,0 @@
|
||||
//===-- SIFixSGPRLiveness.cpp - SGPR liveness adjustment ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// SGPRs are not affected by control flow. This pass adjusts SGPR liveness in
|
||||
/// so that the register allocator can still correctly allocate them.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SIFixSGPRLiveness : public MachineFunctionPass {
|
||||
private:
|
||||
static char ID;
|
||||
|
||||
const TargetInstrInfo *TII;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineDominatorTree *MD;
|
||||
MachinePostDominatorTree *MPD;
|
||||
|
||||
bool isSGPR(const TargetRegisterClass *RegClass) {
|
||||
return RegClass == &AMDGPU::SReg_1RegClass ||
|
||||
RegClass == &AMDGPU::SReg_32RegClass ||
|
||||
RegClass == &AMDGPU::SReg_64RegClass ||
|
||||
RegClass == &AMDGPU::SReg_128RegClass ||
|
||||
RegClass == &AMDGPU::SReg_256RegClass;
|
||||
}
|
||||
|
||||
void addKill(MachineBasicBlock::iterator I, unsigned Reg);
|
||||
MachineBasicBlock *handleUses(unsigned VirtReg, MachineBasicBlock *Begin);
|
||||
void handlePreds(MachineBasicBlock *Begin, MachineBasicBlock *End,
|
||||
unsigned VirtReg);
|
||||
|
||||
bool handleVirtReg(unsigned VirtReg);
|
||||
|
||||
public:
|
||||
SIFixSGPRLiveness(TargetMachine &tm);
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "SI fix SGPR liveness pass";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char SIFixSGPRLiveness::ID = 0;
|
||||
|
||||
SIFixSGPRLiveness::SIFixSGPRLiveness(TargetMachine &tm):
|
||||
MachineFunctionPass(ID),
|
||||
TII(tm.getInstrInfo()) {
|
||||
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
void SIFixSGPRLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addRequired<MachinePostDominatorTree>();
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
void SIFixSGPRLiveness::addKill(MachineBasicBlock::iterator I, unsigned Reg) {
|
||||
MachineBasicBlock *MBB = I->getParent();
|
||||
|
||||
BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)).addReg(Reg);
|
||||
}
|
||||
|
||||
// Find the common post dominator of all uses
|
||||
MachineBasicBlock *SIFixSGPRLiveness::handleUses(unsigned VirtReg,
|
||||
MachineBasicBlock *Begin) {
|
||||
MachineBasicBlock *LastUse = Begin, *End = Begin;
|
||||
bool EndUsesReg = true;
|
||||
|
||||
MachineRegisterInfo::use_iterator i, e;
|
||||
for (i = MRI->use_begin(VirtReg), e = MRI->use_end(); i != e; ++i) {
|
||||
MachineBasicBlock *MBB = i->getParent();
|
||||
if (LastUse == MBB)
|
||||
continue;
|
||||
|
||||
LastUse = MBB;
|
||||
MBB = MPD->findNearestCommonDominator(End, MBB);
|
||||
|
||||
if (MBB == LastUse)
|
||||
EndUsesReg = true;
|
||||
else if (MBB != End)
|
||||
EndUsesReg = false;
|
||||
|
||||
End = MBB;
|
||||
}
|
||||
|
||||
return EndUsesReg ? Begin : End;
|
||||
}
|
||||
|
||||
// Handles predecessors separately, only add KILLs to dominated ones
|
||||
void SIFixSGPRLiveness::handlePreds(MachineBasicBlock *Begin,
|
||||
MachineBasicBlock *End,
|
||||
unsigned VirtReg) {
|
||||
MachineBasicBlock::pred_iterator i, e;
|
||||
for (i = End->pred_begin(), e = End->pred_end(); i != e; ++i) {
|
||||
|
||||
if (MD->dominates(End, *i))
|
||||
continue; // ignore loops
|
||||
|
||||
if (MD->dominates(*i, Begin))
|
||||
continue; // too far up, abort search
|
||||
|
||||
if (MD->dominates(Begin, *i)) {
|
||||
// found end of livetime
|
||||
addKill((*i)->getFirstTerminator(), VirtReg);
|
||||
continue;
|
||||
}
|
||||
|
||||
handlePreds(Begin, *i, VirtReg);
|
||||
}
|
||||
}
|
||||
|
||||
bool SIFixSGPRLiveness::handleVirtReg(unsigned VirtReg) {
|
||||
|
||||
MachineInstr *Def = MRI->getVRegDef(VirtReg);
|
||||
if (!Def || MRI->use_empty(VirtReg))
|
||||
return false; // No definition or not used
|
||||
|
||||
MachineBasicBlock *Begin = Def->getParent();
|
||||
MachineBasicBlock *End = handleUses(VirtReg, Begin);
|
||||
if (Begin == End)
|
||||
return false; // Defined and only used in the same block
|
||||
|
||||
if (MD->dominates(Begin, End)) {
|
||||
// Lifetime dominate the end node, just kill it here
|
||||
addKill(End->getFirstNonPHI(), VirtReg);
|
||||
} else {
|
||||
// only some predecessors are dominate, handle them separately
|
||||
handlePreds(Begin, End, VirtReg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIFixSGPRLiveness::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool Changes = false;
|
||||
|
||||
MRI = &MF.getRegInfo();
|
||||
MD = &getAnalysis<MachineDominatorTree>();
|
||||
MPD = &getAnalysis<MachinePostDominatorTree>();
|
||||
|
||||
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
|
||||
unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i);
|
||||
|
||||
const TargetRegisterClass *RegClass = MRI->getRegClass(VirtReg);
|
||||
if (!isSGPR(RegClass))
|
||||
continue;
|
||||
|
||||
Changes |= handleVirtReg(VirtReg);
|
||||
}
|
||||
|
||||
return Changes;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createSIFixSGPRLivenessPass(TargetMachine &tm) {
|
||||
return new SIFixSGPRLiveness(tm);
|
||||
}
|
||||
@@ -44,8 +44,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
||||
|
||||
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
// We need to custom lower loads from the USER_SGPR address space, so we can
|
||||
@@ -254,7 +252,7 @@ EVT SITargetLowering::getSetCCResultType(EVT VT) const {
|
||||
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
switch (Op.getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
|
||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||
case ISD::LOAD: return LowerLOAD(Op, DAG);
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
|
||||
@@ -298,27 +296,99 @@ SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
|
||||
return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue CC = Op.getOperand(1);
|
||||
SDValue LHS = Op.getOperand(2);
|
||||
SDValue RHS = Op.getOperand(3);
|
||||
SDValue JumpT = Op.getOperand(4);
|
||||
SDValue CmpValue;
|
||||
SDValue Result;
|
||||
CmpValue = DAG.getNode(
|
||||
ISD::SETCC,
|
||||
Op.getDebugLoc(),
|
||||
MVT::i1,
|
||||
LHS, RHS,
|
||||
CC);
|
||||
/// \brief Helper function for LowerBRCOND
|
||||
static SDNode *findUser(SDValue Value, unsigned Opcode) {
|
||||
|
||||
Result = DAG.getNode(
|
||||
AMDGPUISD::BRANCH_COND,
|
||||
CmpValue.getDebugLoc(),
|
||||
MVT::Other, Chain,
|
||||
JumpT, CmpValue);
|
||||
return Result;
|
||||
SDNode *Parent = Value.getNode();
|
||||
for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
|
||||
I != E; ++I) {
|
||||
|
||||
if (I.getUse().get() != Value)
|
||||
continue;
|
||||
|
||||
if (I->getOpcode() == Opcode)
|
||||
return *I;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// This transforms the control flow intrinsics to get the branch destination as
|
||||
/// last parameter, also switches branch target with BR if the need arise
|
||||
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
DebugLoc DL = BRCOND.getDebugLoc();
|
||||
|
||||
SDNode *Intr = BRCOND.getOperand(1).getNode();
|
||||
SDValue Target = BRCOND.getOperand(2);
|
||||
SDNode *BR = 0;
|
||||
|
||||
if (Intr->getOpcode() == ISD::SETCC) {
|
||||
// As long as we negate the condition everything is fine
|
||||
SDNode *SetCC = Intr;
|
||||
assert(SetCC->getConstantOperandVal(1) == 1);
|
||||
|
||||
CondCodeSDNode *CC = cast<CondCodeSDNode>(SetCC->getOperand(2).getNode());
|
||||
assert(CC->get() == ISD::SETNE);
|
||||
Intr = SetCC->getOperand(0).getNode();
|
||||
|
||||
} else {
|
||||
// Get the target from BR if we don't negate the condition
|
||||
BR = findUser(BRCOND, ISD::BR);
|
||||
Target = BR->getOperand(1);
|
||||
}
|
||||
|
||||
assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
|
||||
|
||||
// Build the result and
|
||||
SmallVector<EVT, 4> Res;
|
||||
for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
|
||||
Res.push_back(Intr->getValueType(i));
|
||||
|
||||
// operands of the new intrinsic call
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
Ops.push_back(BRCOND.getOperand(0));
|
||||
for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
|
||||
Ops.push_back(Intr->getOperand(i));
|
||||
Ops.push_back(Target);
|
||||
|
||||
// build the new intrinsic call
|
||||
SDNode *Result = DAG.getNode(
|
||||
Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
|
||||
DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
|
||||
|
||||
if (BR) {
|
||||
// Give the branch instruction our target
|
||||
SDValue Ops[] = {
|
||||
BR->getOperand(0),
|
||||
BRCOND.getOperand(2)
|
||||
};
|
||||
DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
|
||||
}
|
||||
|
||||
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
|
||||
|
||||
// Copy the intrinsic results to registers
|
||||
for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
|
||||
SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
|
||||
if (!CopyToReg)
|
||||
continue;
|
||||
|
||||
Chain = DAG.getCopyToReg(
|
||||
Chain, DL,
|
||||
CopyToReg->getOperand(1),
|
||||
SDValue(Result, i - 1),
|
||||
SDValue());
|
||||
|
||||
DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
|
||||
}
|
||||
|
||||
// Remove the old intrinsic from the chain
|
||||
DAG.ReplaceAllUsesOfValueWith(
|
||||
SDValue(Intr, Intr->getNumValues() - 1),
|
||||
Intr->getOperand(0));
|
||||
|
||||
return Chain;
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
@@ -43,9 +43,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
|
||||
SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned VCCNode) const;
|
||||
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
public:
|
||||
SITargetLowering(TargetMachine &tm);
|
||||
|
||||
@@ -696,8 +696,9 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
|
||||
let isBranch = 1 in {
|
||||
def S_BRANCH : SOPP <
|
||||
0x00000002, (ins brtarget:$target), "S_BRANCH",
|
||||
[]
|
||||
>;
|
||||
[(br bb:$target)]> {
|
||||
let isBarrier = 1;
|
||||
}
|
||||
|
||||
let DisableEncoding = "$scc" in {
|
||||
def S_CBRANCH_SCC0 : SOPP <
|
||||
@@ -1095,26 +1096,70 @@ def SI_WQM : InstSI <
|
||||
|
||||
} // end usesCustomInserter
|
||||
|
||||
// SI Psuedo branch instructions. These are used by the CFG structurizer pass
|
||||
// SI Psuedo instructions. These are used by the CFG structurizer pass
|
||||
// and should be lowered to ISA instructions prior to codegen.
|
||||
|
||||
let isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0,
|
||||
hasSideEffects = 0 in {
|
||||
def SI_IF_NZ : InstSI <
|
||||
(outs),
|
||||
(ins brtarget:$target, SReg_1:$vcc),
|
||||
"SI_BRANCH_NZ",
|
||||
[(IL_brcond bb:$target, SReg_1:$vcc)]
|
||||
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
|
||||
Uses = [EXEC], Defs = [EXEC] in {
|
||||
|
||||
let isBranch = 1, isTerminator = 1 in {
|
||||
|
||||
def SI_IF : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_1:$vcc, brtarget:$target),
|
||||
"SI_IF",
|
||||
[(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))]
|
||||
>;
|
||||
|
||||
def SI_IF_Z : InstSI <
|
||||
def SI_ELSE : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$src, brtarget:$target),
|
||||
"SI_ELSE",
|
||||
[(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
|
||||
|
||||
let Constraints = "$src = $dst";
|
||||
}
|
||||
|
||||
def SI_LOOP : InstSI <
|
||||
(outs),
|
||||
(ins brtarget:$target, SReg_1:$vcc),
|
||||
"SI_BRANCH_Z",
|
||||
[]
|
||||
(ins SReg_64:$saved, brtarget:$target),
|
||||
"SI_LOOP",
|
||||
[(int_SI_loop SReg_64:$saved, bb:$target)]
|
||||
>;
|
||||
} // end isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0,
|
||||
// hasSideEffects = 0
|
||||
|
||||
} // end isBranch = 1, isTerminator = 1
|
||||
|
||||
def SI_BREAK : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$src),
|
||||
"SI_ELSE",
|
||||
[(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
|
||||
>;
|
||||
|
||||
def SI_IF_BREAK : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_1:$vcc, SReg_64:$src),
|
||||
"SI_IF_BREAK",
|
||||
[(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))]
|
||||
>;
|
||||
|
||||
def SI_ELSE_BREAK : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$src0, SReg_64:$src1),
|
||||
"SI_ELSE_BREAK",
|
||||
[(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
|
||||
>;
|
||||
|
||||
def SI_END_CF : InstSI <
|
||||
(outs),
|
||||
(ins SReg_64:$saved),
|
||||
"SI_END_CF",
|
||||
[(int_SI_end_cf SReg_64:$saved)]
|
||||
>;
|
||||
|
||||
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
|
||||
// Uses = [EXEC], Defs = [EXEC]
|
||||
|
||||
} // end IsCodeGenOnly, isPseudo
|
||||
|
||||
/* int_SI_vs_load_input */
|
||||
|
||||
@@ -39,4 +39,14 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
||||
|
||||
def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
|
||||
def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
/* Control flow Intrinsics */
|
||||
|
||||
def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
|
||||
def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
|
||||
def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
|
||||
def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
|
||||
def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
|
||||
def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
|
||||
def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
|
||||
}
|
||||
|
||||
@@ -8,10 +8,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief This pass lowers the pseudo control flow instructions (SI_IF_NZ, ELSE, ENDIF)
|
||||
/// to predicated instructions.
|
||||
/// \brief This pass lowers the pseudo control flow instructions to real
|
||||
/// machine instructions.
|
||||
///
|
||||
/// All control flow (except loops) is handled using predicated instructions and
|
||||
/// All control flow is handled using predicated instructions and
|
||||
/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
|
||||
/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
|
||||
/// by writting to the 64-bit EXEC register (each bit corresponds to a
|
||||
@@ -22,17 +22,17 @@
|
||||
///
|
||||
/// For example:
|
||||
/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
|
||||
/// SI_IF_NZ %VCC
|
||||
/// %SGPR0 = SI_IF %VCC
|
||||
/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
|
||||
/// ELSE
|
||||
/// %SGPR0 = SI_ELSE %SGPR0
|
||||
/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
|
||||
/// ENDIF
|
||||
/// SI_END_CF %SGPR0
|
||||
///
|
||||
/// becomes:
|
||||
///
|
||||
/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
|
||||
/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
|
||||
/// S_CBRANCH_EXECZ label0 // This instruction is an
|
||||
/// S_CBRANCH_EXECZ label0 // This instruction is an optional
|
||||
/// // optimization which allows us to
|
||||
/// // branch if all the bits of
|
||||
/// // EXEC are zero.
|
||||
@@ -45,7 +45,7 @@
|
||||
/// // instruction again.
|
||||
/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
|
||||
/// label1:
|
||||
/// %EXEC = S_OR_B64 %EXEC, %SGPR2 // Re-enable saved exec mask bits
|
||||
/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
@@ -65,11 +65,14 @@ class SILowerControlFlowPass : public MachineFunctionPass {
|
||||
private:
|
||||
static char ID;
|
||||
const TargetInstrInfo *TII;
|
||||
std::vector<unsigned> PredicateStack;
|
||||
std::vector<unsigned> UnusedRegisters;
|
||||
|
||||
unsigned allocReg();
|
||||
void freeReg(unsigned Reg);
|
||||
void If(MachineInstr &MI);
|
||||
void Else(MachineInstr &MI);
|
||||
void Break(MachineInstr &MI);
|
||||
void IfBreak(MachineInstr &MI);
|
||||
void ElseBreak(MachineInstr &MI);
|
||||
void Loop(MachineInstr &MI);
|
||||
void EndCf(MachineInstr &MI);
|
||||
|
||||
public:
|
||||
SILowerControlFlowPass(TargetMachine &tm) :
|
||||
@@ -91,101 +94,199 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
|
||||
return new SILowerControlFlowPass(tm);
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::If(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
unsigned Reg = MI.getOperand(0).getReg();
|
||||
unsigned Vcc = MI.getOperand(1).getReg();
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
|
||||
.addReg(Vcc);
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(Reg);
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::Else(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Src = MI.getOperand(1).getReg();
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
|
||||
.addReg(Src); // Saved EXEC
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(Dst);
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::Break(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Src = MI.getOperand(1).getReg();
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(Src);
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Vcc = MI.getOperand(1).getReg();
|
||||
unsigned Src = MI.getOperand(2).getReg();
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
|
||||
.addReg(Vcc)
|
||||
.addReg(Src);
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Saved = MI.getOperand(1).getReg();
|
||||
unsigned Src = MI.getOperand(2).getReg();
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
|
||||
.addReg(Saved)
|
||||
.addReg(Src);
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::Loop(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
unsigned Src = MI.getOperand(0).getReg();
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(Src);
|
||||
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addOperand(MI.getOperand(1))
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
unsigned Reg = MI.getOperand(0).getReg();
|
||||
|
||||
BuildMI(MBB, MBB.getFirstNonPHI(), DL,
|
||||
TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(Reg);
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
// Find all the unused registers that can be used for the predicate stack.
|
||||
for (TargetRegisterClass::iterator I = AMDGPU::SReg_64RegClass.begin(),
|
||||
S = AMDGPU::SReg_64RegClass.end();
|
||||
I != S; ++I) {
|
||||
unsigned Reg = *I;
|
||||
if (!MF.getRegInfo().isPhysRegUsed(Reg)) {
|
||||
UnusedRegisters.insert(UnusedRegisters.begin(), Reg);
|
||||
}
|
||||
}
|
||||
bool HaveCf = false;
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
||||
I != MBB.end(); I = Next) {
|
||||
I != MBB.end(); I = Next) {
|
||||
|
||||
Next = llvm::next(I);
|
||||
MachineInstr &MI = *I;
|
||||
unsigned Reg;
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_IF_NZ:
|
||||
Reg = allocReg();
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
|
||||
Reg)
|
||||
.addOperand(MI.getOperand(0)); // VCC
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
|
||||
Reg)
|
||||
.addReg(Reg)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
MI.eraseFromParent();
|
||||
PredicateStack.push_back(Reg);
|
||||
case AMDGPU::SI_IF:
|
||||
If(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::ELSE:
|
||||
Reg = PredicateStack.back();
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
|
||||
Reg)
|
||||
.addReg(Reg);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(Reg)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
MI.eraseFromParent();
|
||||
case AMDGPU::SI_ELSE:
|
||||
Else(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::ENDIF:
|
||||
Reg = PredicateStack.back();
|
||||
PredicateStack.pop_back();
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(Reg);
|
||||
freeReg(Reg);
|
||||
case AMDGPU::SI_BREAK:
|
||||
Break(MI);
|
||||
break;
|
||||
|
||||
if (MF.getInfo<SIMachineFunctionInfo>()->ShaderType == ShaderType::PIXEL &&
|
||||
PredicateStack.empty()) {
|
||||
// If the exec mask is non-zero, skip the next two instructions
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addImm(3)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
case AMDGPU::SI_IF_BREAK:
|
||||
IfBreak(MI);
|
||||
break;
|
||||
|
||||
// Exec mask is zero: Export to NULL target...
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::EXP))
|
||||
.addImm(0)
|
||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
||||
.addImm(0)
|
||||
.addImm(1)
|
||||
.addImm(1)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0);
|
||||
case AMDGPU::SI_ELSE_BREAK:
|
||||
ElseBreak(MI);
|
||||
break;
|
||||
|
||||
// ... and terminate wavefront
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
case AMDGPU::SI_LOOP:
|
||||
Loop(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_END_CF:
|
||||
HaveCf = true;
|
||||
EndCf(MI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: What is this good for?
|
||||
unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
|
||||
if (HaveCf && ShaderType == ShaderType::PIXEL) {
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
if (MBB.succ_empty()) {
|
||||
|
||||
MachineInstr &MI = *MBB.getFirstNonPHI();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
// If the exec mask is non-zero, skip the next two instructions
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addImm(3)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
// Exec mask is zero: Export to NULL target...
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
|
||||
.addImm(0)
|
||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
||||
.addImm(0)
|
||||
.addImm(1)
|
||||
.addImm(1)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0);
|
||||
|
||||
// ... and terminate wavefront
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned SILowerControlFlowPass::allocReg() {
|
||||
|
||||
assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
|
||||
unsigned Reg = UnusedRegisters.back();
|
||||
UnusedRegisters.pop_back();
|
||||
return Reg;
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::freeReg(unsigned Reg) {
|
||||
|
||||
UnusedRegisters.push_back(Reg);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user