mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 00:32:55 +00:00
[SLP] Re-enable vectorization of GEP expressions (re-apply r210342 with a fix).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216549 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fda6b888a9
commit
b8c95a89e6
@ -1179,6 +1179,64 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
}
|
||||
return;
|
||||
}
|
||||
case Instruction::GetElementPtr: {
|
||||
// We don't combine GEPs with complicated (nested) indexing.
|
||||
for (unsigned j = 0; j < VL.size(); ++j) {
|
||||
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
|
||||
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
|
||||
BS.cancelScheduling(VL);
|
||||
newTreeEntry(VL, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// We combine only GEPs with a single use.
|
||||
for (unsigned j = 0; j < VL.size(); ++j) {
|
||||
if (cast<Instruction>(VL[j])->getNumUses() > 1) {
|
||||
DEBUG(dbgs() << "SLP: not-vectorizable GEP (multiple uses).\n");
|
||||
BS.cancelScheduling(VL);
|
||||
newTreeEntry(VL, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// We can't combine several GEPs into one vector if they operate on
|
||||
// different types.
|
||||
Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
|
||||
for (unsigned j = 0; j < VL.size(); ++j) {
|
||||
Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
|
||||
if (Ty0 != CurTy) {
|
||||
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
|
||||
BS.cancelScheduling(VL);
|
||||
newTreeEntry(VL, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// We don't combine GEPs with non-constant indexes.
|
||||
for (unsigned j = 0; j < VL.size(); ++j) {
|
||||
auto Op = cast<Instruction>(VL[j])->getOperand(1);
|
||||
if (!isa<ConstantInt>(Op)) {
|
||||
DEBUG(
|
||||
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
|
||||
BS.cancelScheduling(VL);
|
||||
newTreeEntry(VL, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
newTreeEntry(VL, true);
|
||||
DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
|
||||
for (unsigned i = 0, e = 2; i < e; ++i) {
|
||||
ValueList Operands;
|
||||
// Prepare the operand vector.
|
||||
for (unsigned j = 0; j < VL.size(); ++j)
|
||||
Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
|
||||
|
||||
buildTree_rec(Operands, Depth + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
// Check if the stores are consecutive or of we need to swizzle them.
|
||||
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
|
||||
@ -1416,6 +1474,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
}
|
||||
return VecCost - ScalarCost;
|
||||
}
|
||||
case Instruction::GetElementPtr: {
|
||||
TargetTransformInfo::OperandValueKind Op1VK =
|
||||
TargetTransformInfo::OK_AnyValue;
|
||||
TargetTransformInfo::OperandValueKind Op2VK =
|
||||
TargetTransformInfo::OK_UniformConstantValue;
|
||||
|
||||
int ScalarCost =
|
||||
VecTy->getNumElements() *
|
||||
TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
|
||||
int VecCost =
|
||||
TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
|
||||
|
||||
return VecCost - ScalarCost;
|
||||
}
|
||||
case Instruction::Load: {
|
||||
// Cost of wide load - cost of scalar loads.
|
||||
int ScalarLdCost = VecTy->getNumElements() *
|
||||
@ -1982,6 +2054,35 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
++NumVectorInstructions;
|
||||
return propagateMetadata(S, E->Scalars);
|
||||
}
|
||||
case Instruction::GetElementPtr: {
|
||||
setInsertPointAfterBundle(E->Scalars);
|
||||
|
||||
ValueList Op0VL;
|
||||
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
|
||||
Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
|
||||
|
||||
Value *Op0 = vectorizeTree(Op0VL);
|
||||
|
||||
std::vector<Value *> OpVecs;
|
||||
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
|
||||
++j) {
|
||||
ValueList OpVL;
|
||||
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
|
||||
OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
|
||||
|
||||
Value *OpVec = vectorizeTree(OpVL);
|
||||
OpVecs.push_back(OpVec);
|
||||
}
|
||||
|
||||
Value *V = Builder.CreateGEP(Op0, OpVecs);
|
||||
E->VectorizedValue = V;
|
||||
++NumVectorInstructions;
|
||||
|
||||
if (Instruction *I = dyn_cast<Instruction>(V))
|
||||
return propagateMetadata(I, E->Scalars);
|
||||
|
||||
return V;
|
||||
}
|
||||
case Instruction::Call: {
|
||||
CallInst *CI = cast<CallInst>(VL0);
|
||||
setInsertPointAfterBundle(E->Scalars);
|
||||
|
41
test/Transforms/SLPVectorizer/X86/gep.ll
Normal file
41
test/Transforms/SLPVectorizer/X86/gep.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Test if SLP can handle GEP expressions.
|
||||
; The test perform the following action:
|
||||
; x->first = y->first + 16
|
||||
; x->second = y->second + 16
|
||||
|
||||
; CHECK-LABEL: foo1
|
||||
; CHECK: <2 x i32*>
|
||||
define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
|
||||
%1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
|
||||
%2 = load i32** %1, align 8
|
||||
%3 = getelementptr inbounds i32* %2, i64 16
|
||||
%4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
|
||||
store i32* %3, i32** %4, align 8
|
||||
%5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
|
||||
%6 = load i32** %5, align 8
|
||||
%7 = getelementptr inbounds i32* %6, i64 16
|
||||
%8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
|
||||
store i32* %7, i32** %8, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that we don't vectorize GEP expressions if indexes are not constants.
|
||||
; We can't produce an efficient code in that case.
|
||||
; CHECK-LABEL: foo2
|
||||
; CHECK-NOT: <2 x i32*>
|
||||
define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
|
||||
%1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
|
||||
%2 = load i32** %1, align 8
|
||||
%3 = getelementptr inbounds i32* %2, i32 %i
|
||||
%4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
|
||||
store i32* %3, i32** %4, align 8
|
||||
%5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
|
||||
%6 = load i32** %5, align 8
|
||||
%7 = getelementptr inbounds i32* %6, i32 %i
|
||||
%8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
|
||||
store i32* %7, i32** %8, align 8
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user