Refactor 'vectorizeLoop' no functionality change.

This patch merges LoopVectorize of InnerLoopVectorizer and InnerLoopUnroller by adding checks for VF=1. This helps in erasing the Unroller code that is almost identical to the InnerLoopVectorizer code.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189391 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2013-08-27 18:52:47 +00:00
parent 0b90c6223d
commit bd28f5c856

View File

@ -354,7 +354,6 @@ public:
InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
private:
virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
virtual void scalarizeInstruction(Instruction *Instr);
virtual void vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality *Legal);
@ -2049,18 +2048,31 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
// MinMax reduction have the start value as their identify.
VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
"minmax.ident");
if (VF == 1) {
VectorStart = Identity = RdxDesc.StartValue;
} else {
VectorStart = Identity = Builder.CreateVectorSplat(VF,
RdxDesc.StartValue,
"minmax.ident");
}
} else {
// Handle other reduction kinds:
Constant *Iden =
LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
VecTy->getScalarType());
Identity = ConstantVector::getSplat(VF, Iden);
LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
VecTy->getScalarType());
if (VF == 1) {
Identity = Iden;
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
VectorStart = RdxDesc.StartValue;
} else {
Identity = ConstantVector::getSplat(VF, Iden);
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
VectorStart = Builder.CreateInsertElement(Identity,
RdxDesc.StartValue, Zero);
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
VectorStart = Builder.CreateInsertElement(Identity,
RdxDesc.StartValue, Zero);
}
}
// Fix the vector-loop phi.
@ -2116,37 +2128,40 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ReducedPartRdx, RdxParts[part]);
}
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
// round.
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = ReducedPartRdx;
SmallVector<Constant*, 32> ShuffleMask(VF, 0);
for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i/2; ++j)
ShuffleMask[j] = Builder.getInt32(i/2 + j);
if (VF > 1) {
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
// round.
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = ReducedPartRdx;
SmallVector<Constant*, 32> ShuffleMask(VF, 0);
for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i/2; ++j)
ShuffleMask[j] = Builder.getInt32(i/2 + j);
// Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
UndefValue::get(Builder.getInt32Ty()));
// Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
UndefValue::get(Builder.getInt32Ty()));
Value *Shuf =
Value *Shuf =
Builder.CreateShuffleVector(TmpVec,
UndefValue::get(TmpVec->getType()),
ConstantVector::get(ShuffleMask),
"rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
else
TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
}
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
else
TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
}
// The result is in the first element of the vector.
Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
// The result is in the first element of the vector.
ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
Builder.getInt32(0));
}
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
@ -2165,7 +2180,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
// Add an edge coming from the bypass.
LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
}// end of the LCSSA phi scan.
@ -2177,7 +2192,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
@ -4788,155 +4803,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
return false;
}
void
InnerLoopUnroller::vectorizeLoop(LoopVectorizationLegality *Legal) {
// In order to support reduction variables we need to be able to unroll
// Phi nodes. Phi nodes have cycles, so we need to unroll them in two
// stages. See InnerLoopVectorizer::vectorizeLoop for more details.
PhiVector RdxPHIsToFix;
// Scan the loop in a topological order to ensure that defs are vectorized
// before users.
LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);
// Unroll all of the blocks in the original loop.
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
bb != be; ++bb)
vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
// Create the 'reduced' values for each of the induction vars.
// The reduced values are the vector values that we scalarize and combine
// after the loop is finished.
for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
it != e; ++it) {
PHINode *RdxPhi = *it;
assert(RdxPhi && "Unable to recover vectorized PHI");
// Find the reduction variable descriptor.
assert(Legal->getReductionVars()->count(RdxPhi) &&
"Unable to find the reduction variable");
LoopVectorizationLegality::ReductionDescriptor RdxDesc =
(*Legal->getReductionVars())[RdxPhi];
setDebugLocFromInst(Builder, RdxDesc.StartValue);
// We need to generate a reduction vector from the incoming scalar.
// To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
Type *VecTy = VectorExit[0]->getType();
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
Value *Identity;
Value *VectorStart;
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
// MinMax reduction have the start value as their identify.
VectorStart = Identity = RdxDesc.StartValue;
} else {
Identity = LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
VecTy->getScalarType());
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
VectorStart = RdxDesc.StartValue;
}
// Fix the vector-loop phi.
// We created the induction variable so we know that the
// preheader is the first entry.
BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
BasicBlock *Latch = OrigLoop->getLoopLatch();
Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
VectorParts &Val = getVectorValue(LoopVal);
for (unsigned part = 0; part < UF; ++part) {
// Make sure to add the reduction stat value only to the
// first unroll part.
Value *StartVal = (part == 0) ? VectorStart : Identity;
cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
}
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
// instructions.
Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
VectorParts RdxParts;
setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
for (unsigned part = 0; part < UF; ++part) {
// This PHINode contains the vectorized reduction variable, or
// the initial value vector, if we bypass the vector loop.
VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity;
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
RdxParts.push_back(NewPhi);
}
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = getReductionBinOp(RdxDesc.Kind);
setDebugLocFromInst(Builder, ReducedPartRdx);
for (unsigned part = 1; part < UF; ++part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
RdxParts[part], ReducedPartRdx,
"bin.rdx");
else
ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
ReducedPartRdx, RdxParts[part]);
}
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
// We know that the loop is in LCSSA form. We need to update the
// PHI nodes in the exit blocks.
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
if (!LCSSAPhi) continue;
// All PHINodes need to have a single entry edge, or two if
// we already fixed them.
assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
// We found our reduction value exit-PHI. Update it with the
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
// Add an edge coming from the bypass.
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
}// end of the LCSSA phi scan.
// Fix the scalar loop reduction variable with the incoming reduction sum
// from the vector body and from the backedge value.
int IncomingEdgeBlockIdx =
(RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
fixLCSSAPHIs();
}
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");