SLPVectorizer: Add support for trees with external users.

For example:
bar() {
  int a = A[i];
  int b = A[i+1];
  B[i] = a;
  B[i+1] = b;
  foo(a);  <--- a is used outside the vectorized expression.
}



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181648 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem
2013-05-10 22:59:33 +00:00
parent 09ec4b2164
commit 9bba9f6300
3 changed files with 60 additions and 13 deletions

View File

@@ -243,6 +243,10 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
LaneMap.clear();
MultiUserVals.clear();
MustScalarize.clear();
MustExtract.clear();
// Find the location of the last root.
unsigned LastRootIndex = InstrIdx[GetLastInstr(VL, VL.size())];
// Scan the tree and find which value is used by which lane, and which values
// must be scalarized.
@@ -258,15 +262,31 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
I != E; ++I) {
if (LaneMap.find(*I) == LaneMap.end()) {
MustScalarize.insert(*it);
DEBUG(dbgs()<<"SLP: Adding " << **it <<
" to MustScalarize because of an out of tree usage.\n");
break;
DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
// We don't have an ordering problem if the user is not in this basic
// block.
Instruction *Inst = cast<Instruction>(*I);
if (Inst->getParent() == BB) {
// We don't have an ordering problem if the user is after the last
// root.
unsigned Idx = InstrIdx[Inst];
if (Idx < LastRootIndex) {
MustScalarize.insert(*it);
DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
"because of an unsafe out of tree usage.\n");
break;
}
}
DEBUG(dbgs()<<"SLP: Adding to MustExtract "
"because of a safe out of tree usage.\n");
MustExtract.insert(*it);
}
if (Lane == -1) Lane = LaneMap[*I];
if (Lane != LaneMap[*I]) {
MustScalarize.insert(*it);
DEBUG(dbgs()<<"Adding " << **it <<
DEBUG(dbgs()<<"SLP: Adding " << **it <<
" to MustScalarize because multiple lane use it: "
<< Lane << " and " << LaneMap[*I] << ".\n");
break;
@@ -456,6 +476,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
// Calculate the extract cost.
unsigned ExternalUserExtractCost = 0;
for (unsigned i = 0, e = VL.size(); i < e; ++i)
if (MustExtract.count(VL[i]))
ExternalUserExtractCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -469,7 +496,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
int Cost = 0;
int Cost = ExternalUserExtractCost;
ValueList Operands;
Type *SrcTy = VL0->getOperand(0)->getType();
// Prepare the operand vector.
@@ -510,7 +537,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
int Cost = 0;
int Cost = ExternalUserExtractCost;
// Calculate the cost of all of the operands.
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
@@ -540,7 +567,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
return VecLdCost - ScalarLdCost;
return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
@@ -556,7 +583,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
return TotalCost;
return TotalCost + ExternalUserExtractCost;
}
default:
// Unable to vectorize unknown instructions.
@@ -588,10 +615,24 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
Value *V = vectorizeTree_rec(VL, VF);
Instruction *LastInstr = GetLastInstr(VL, VL.size());
IRBuilder<> Builder(LastInstr);
for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
it != e; ++it) {
Instruction *I = cast<Instruction>(*it);
Value *Vec = VectorizedValues[I];
assert(LaneMap.count(I) && "Unable to find the lane for the external use");
Value *Idx = Builder.getInt32(LaneMap[I]);
Value *Extract = Builder.CreateExtractElement(Vec, Idx);
I->replaceAllUsesWith(Extract);
}
// We moved some instructions around. We have to number them again
// before we can do any analysis.
numberInstructions();
MustScalarize.clear();
MustExtract.clear();
return V;
}