SLPVectorizer: support slp-vectorization of PHINodes between basic blocks

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184888 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem
2013-06-25 23:04:09 +00:00
parent b1c0cc22dd
commit 805e8a01fe
2 changed files with 142 additions and 1 deletions

View File

@ -239,6 +239,10 @@ public:
/// NOTICE: The vectorization methods also use this set.
ValueSet MustGather;
/// Contains PHINodes that are being processed. We use this data structure
/// to stop cycles in the graph.
ValueSet VisitedPHIs;
/// Contains a list of values that are used outside the current tree. This
/// set must be reset between runs.
SetVector<Value *> MultiUserVals;
@ -457,13 +461,31 @@ void FuncSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Mark instructions with multiple users.
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
if (PHINode *PN = dyn_cast<PHINode>(VL[i])) {
unsigned NumUses = 0;
// Check that PHINodes have only one external (non-self) use.
for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
U != UE; ++U) {
// Don't count self uses.
if (*U == PN)
continue;
NumUses++;
}
if (NumUses > 1) {
DEBUG(dbgs() << "SLP: Adding PHI to MultiUserVals "
"because it has " << NumUses << " users:" << *PN << " \n");
MultiUserVals.insert(PN);
}
continue;
}
Instruction *I = dyn_cast<Instruction>(VL[i]);
// Remember to check if all of the users of this instruction are vectorized
// within our tree. At depth zero we have no local users, only external
// users that we don't care about.
if (Depth && I && I->getNumUses() > 1) {
DEBUG(dbgs() << "SLP: Adding to MultiUserVals "
"because it has multiple users:" << *I << " \n");
"because it has " << I->getNumUses() << " users:" << *I << " \n");
MultiUserVals.insert(I);
}
}
@ -483,6 +505,24 @@ void FuncSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
return MustGather.insert(VL.begin(), VL.end());
switch (Opcode) {
case Instruction::PHI: {
PHINode *PH = dyn_cast<PHINode>(VL0);
// Stop self cycles.
if (VisitedPHIs.count(PH))
return;
VisitedPHIs.insert(PH);
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j)
Operands.push_back(cast<PHINode>(VL[j])->getIncomingValue(i));
getTreeUses_rec(Operands, Depth + 1);
}
return;
}
case Instruction::ExtractElement: {
VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());
// No need to follow ExtractElements that are going to be optimized away.
@ -640,6 +680,35 @@ int FuncSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
Instruction *VL0 = cast<Instruction>(VL[0]);
switch (Opcode) {
case Instruction::PHI: {
PHINode *PH = dyn_cast<PHINode>(VL0);
// Stop self cycles.
if (VisitedPHIs.count(PH))
return 0;
VisitedPHIs.insert(PH);
int TotalCost = 0;
// Calculate the cost of all of the operands.
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j)
Operands.push_back(cast<PHINode>(VL[j])->getIncomingValue(i));
int Cost = getTreeCost_rec(Operands, Depth + 1);
if (Cost == MAX_COST)
return MAX_COST;
TotalCost += TotalCost;
}
if (TotalCost > GatherCost) {
MustGather.insert(VL.begin(), VL.end());
return GatherCost;
}
return TotalCost;
}
case Instruction::ExtractElement: {
if (CanReuseExtract(VL, VL.size(), VecTy))
return 0;
@ -806,6 +875,7 @@ int FuncSLP::getTreeCost(ArrayRef<Value *> VL) {
LaneMap.clear();
MultiUserVals.clear();
MustGather.clear();
VisitedPHIs.clear();
if (!getSameBlock(VL))
return MAX_COST;
@ -990,6 +1060,30 @@ Value *FuncSLP::vectorizeTree_rec(ArrayRef<Value *> VL) {
assert(Opcode == getSameOpcode(VL) && "Invalid opcode");
switch (Opcode) {
case Instruction::PHI: {
PHINode *PH = dyn_cast<PHINode>(VL0);
Builder.SetInsertPoint(PH->getParent()->getFirstInsertionPt());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
VectorizedValues[VL0] = NewPhi;
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
BasicBlock *IBB = PH->getIncomingBlock(i);
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j)
Operands.push_back(cast<PHINode>(VL[j])->getIncomingValueForBlock(IBB));
Builder.SetInsertPoint(IBB->getTerminator());
Value *Vec = vectorizeTree_rec(Operands);
NewPhi->addIncoming(Vec, IBB);
}
assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
"Invalid number of incoming values");
return NewPhi;
}
case Instruction::ExtractElement: {
if (CanReuseExtract(VL, VL.size(), VecTy))
return VL0->getOperand(0);
@ -1150,6 +1244,7 @@ Value *FuncSLP::vectorizeTree(ArrayRef<Value *> VL) {
BlocksNumbers[it].forget();
// Clear the state.
MustGather.clear();
VisitedPHIs.clear();
VectorizedValues.clear();
MemBarrierIgnoreList.clear();
return V;