Teach the cost model about the optimization in r169904: Truncation of induction variables costs the same as scalar trunc.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170051 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2012-12-13 00:21:03 +00:00
parent 0fabd08066
commit 807dad62a0
3 changed files with 21 additions and 3 deletions

View File

@ -106,8 +106,7 @@ struct LoopVectorize : public LoopPass {
}
DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
L->getHeader()->getParent()->getParent()->getModuleIdentifier()<<
"\n");
F->getParent()->getModuleIdentifier()<<"\n");
// If we decided that it is *legal* to vectorizer the loop then do it.
InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF);
@ -1849,6 +1848,15 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
return NoInduction;
}
bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
Value *In0 = const_cast<Value*>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
if (!PN)
return false;
return Inductions.count(PN);
}
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
assert(TheLoop->contains(BB) && "Unknown block used");
@ -2110,6 +2118,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
// We optimize the truncation of induction variable.
// The cost of these is the same as the scalar operation.
if (I->getOpcode() == Instruction::Trunc &&
Legal->isInductionVariable(I->getOperand(0)))
return VTTI->getCastInstrCost(I->getOpcode(), I->getType(),
I->getOperand(0)->getType());
Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}

View File

@ -320,6 +320,9 @@ public:
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
bool blockNeedsPredication(BasicBlock *BB);

View File

@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.8.0"
;CHECK: @conversion_cost1
;CHECK: store <2 x i8>
;CHECK: store <8 x i8>
;CHECK: ret
define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 3