[SLP] Re-enable vectorization of GEP expressions (re-apply r210342 with a fix).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216549 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Zolotukhin
2014-08-27 15:01:18 +00:00
parent fda6b888a9
commit b8c95a89e6
2 changed files with 142 additions and 0 deletions

View File

@ -1179,6 +1179,64 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
return;
}
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (unsigned j = 0; j < VL.size(); ++j) {
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
// We combine only GEPs with a single use.
for (unsigned j = 0; j < VL.size(); ++j) {
if (cast<Instruction>(VL[j])->getNumUses() > 1) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (multiple uses).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
// We can't combine several GEPs into one vector if they operate on
// different types.
Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
for (unsigned j = 0; j < VL.size(); ++j) {
Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
if (Ty0 != CurTy) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
// We don't combine GEPs with non-constant indexes.
for (unsigned j = 0; j < VL.size(); ++j) {
auto Op = cast<Instruction>(VL[j])->getOperand(1);
if (!isa<ConstantInt>(Op)) {
DEBUG(
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j)
Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
buildTree_rec(Operands, Depth + 1);
}
return;
}
case Instruction::Store: {
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
@ -1416,6 +1474,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return VecCost - ScalarCost;
}
case Instruction::GetElementPtr: {
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
int ScalarCost =
VecTy->getNumElements() *
TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
int VecCost =
TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
return VecCost - ScalarCost;
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
int ScalarLdCost = VecTy->getNumElements() *
@ -1982,6 +2054,35 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
++NumVectorInstructions;
return propagateMetadata(S, E->Scalars);
}
case Instruction::GetElementPtr: {
setInsertPointAfterBundle(E->Scalars);
ValueList Op0VL;
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
Value *Op0 = vectorizeTree(Op0VL);
std::vector<Value *> OpVecs;
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
++j) {
ValueList OpVL;
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
Value *OpVec = vectorizeTree(OpVL);
OpVecs.push_back(OpVec);
}
Value *V = Builder.CreateGEP(Op0, OpVecs);
E->VectorizedValue = V;
++NumVectorInstructions;
if (Instruction *I = dyn_cast<Instruction>(V))
return propagateMetadata(I, E->Scalars);
return V;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E->Scalars);