[SLP] Re-enable vectorization of GEP expressions (re-apply r210342 with a fix).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216549 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Zolotukhin 2014-08-27 15:01:18 +00:00
parent fda6b888a9
commit b8c95a89e6
2 changed files with 142 additions and 0 deletions

View File

@ -1179,6 +1179,64 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
return;
}
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (unsigned j = 0; j < VL.size(); ++j) {
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
// We combine only GEPs with a single use.
for (unsigned j = 0; j < VL.size(); ++j) {
if (cast<Instruction>(VL[j])->getNumUses() > 1) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (multiple uses).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
// We can't combine several GEPs into one vector if they operate on
// different types.
Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
for (unsigned j = 0; j < VL.size(); ++j) {
Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
if (Ty0 != CurTy) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
// We don't combine GEPs with non-constant indexes.
for (unsigned j = 0; j < VL.size(); ++j) {
auto Op = cast<Instruction>(VL[j])->getOperand(1);
if (!isa<ConstantInt>(Op)) {
DEBUG(
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
}
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j)
Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
buildTree_rec(Operands, Depth + 1);
}
return;
}
case Instruction::Store: {
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
@ -1416,6 +1474,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return VecCost - ScalarCost;
}
case Instruction::GetElementPtr: {
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
int ScalarCost =
VecTy->getNumElements() *
TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
int VecCost =
TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
return VecCost - ScalarCost;
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
int ScalarLdCost = VecTy->getNumElements() *
@ -1982,6 +2054,35 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
++NumVectorInstructions;
return propagateMetadata(S, E->Scalars);
}
case Instruction::GetElementPtr: {
setInsertPointAfterBundle(E->Scalars);
ValueList Op0VL;
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
Value *Op0 = vectorizeTree(Op0VL);
std::vector<Value *> OpVecs;
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
++j) {
ValueList OpVL;
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
Value *OpVec = vectorizeTree(OpVL);
OpVecs.push_back(OpVec);
}
Value *V = Builder.CreateGEP(Op0, OpVecs);
E->VectorizedValue = V;
++NumVectorInstructions;
if (Instruction *I = dyn_cast<Instruction>(V))
return propagateMetadata(I, E->Scalars);
return V;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E->Scalars);

View File

@ -0,0 +1,41 @@
; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Test if SLP can handle GEP expressions.
; The test perform the following action:
; x->first = y->first + 16
; x->second = y->second + 16
; CHECK-LABEL: foo1
; CHECK: <2 x i32*>
define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
%1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
%2 = load i32** %1, align 8
%3 = getelementptr inbounds i32* %2, i64 16
%4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
store i32* %3, i32** %4, align 8
%5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
%6 = load i32** %5, align 8
%7 = getelementptr inbounds i32* %6, i64 16
%8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
store i32* %7, i32** %8, align 8
ret void
}
; Test that we don't vectorize GEP expressions if indexes are not constants.
; We can't produce an efficient code in that case.
; CHECK-LABEL: foo2
; CHECK-NOT: <2 x i32*>
define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
%1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
%2 = load i32** %1, align 8
%3 = getelementptr inbounds i32* %2, i32 %i
%4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
store i32* %3, i32** %4, align 8
%5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
%6 = load i32** %5, align 8
%7 = getelementptr inbounds i32* %6, i32 %i
%8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
store i32* %7, i32** %8, align 8
ret void
}