BBVectorize: Cache fixed-order pairs instead of recomputing pointer info.

Instead of recomputing relative pointer information just prior to fusing,
cache this information (which also needs to be computed during the
candidate-pair selection process). This cuts down on the total number of
SE queries made, and also is a necessary intermediate step on the road toward
including shuffle costs in the pair selection procedure.

No functionality change is intended.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167049 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel
2012-10-30 20:17:37 +00:00
parent 2f34d754d0
commit a9779bfbc9

View File

@ -216,6 +216,7 @@ namespace {
bool getCandidatePairs(BasicBlock &BB, bool getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start, BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs, std::multimap<Value *, Value *> &CandidatePairs,
DenseSet<ValuePair> &FixedOrderPairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings, DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len); std::vector<Value *> &PairableInsts, bool NonPow2Len);
@ -237,13 +238,14 @@ namespace {
void fuseChosenPairs(BasicBlock &BB, void fuseChosenPairs(BasicBlock &BB,
std::vector<Value *> &PairableInsts, std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *>& ChosenPairs); DenseMap<Value *, Value *>& ChosenPairs,
DenseSet<ValuePair> &FixedOrderPairs);
bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
bool areInstsCompatible(Instruction *I, Instruction *J, bool areInstsCompatible(Instruction *I, Instruction *J,
bool IsSimpleLoadStore, bool NonPow2Len, bool IsSimpleLoadStore, bool NonPow2Len,
int &CostSavings); int &CostSavings, int &FixedOrder);
bool trackUsesOfI(DenseSet<Value *> &Users, bool trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I, AliasSetTracker &WriteSet, Instruction *I,
@ -332,10 +334,6 @@ namespace {
DenseMap<Value *, Value *> &ChosenPairs, DenseMap<Value *, Value *> &ChosenPairs,
std::multimap<Value *, Value *> &LoadMoveSet); std::multimap<Value *, Value *> &LoadMoveSet);
void collectPtrInfo(std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<Value *> &LowPtrInsts);
bool canMoveUsesOfIAfterJ(BasicBlock &BB, bool canMoveUsesOfIAfterJ(BasicBlock &BB,
std::multimap<Value *, Value *> &LoadMoveSet, std::multimap<Value *, Value *> &LoadMoveSet,
Instruction *I, Instruction *J); Instruction *I, Instruction *J);
@ -648,12 +646,15 @@ namespace {
std::vector<Value *> AllPairableInsts; std::vector<Value *> AllPairableInsts;
DenseMap<Value *, Value *> AllChosenPairs; DenseMap<Value *, Value *> AllChosenPairs;
DenseSet<ValuePair> AllFixedOrderPairs;
do { do {
std::vector<Value *> PairableInsts; std::vector<Value *> PairableInsts;
std::multimap<Value *, Value *> CandidatePairs; std::multimap<Value *, Value *> CandidatePairs;
DenseSet<ValuePair> FixedOrderPairs;
DenseMap<ValuePair, int> CandidatePairCostSavings; DenseMap<ValuePair, int> CandidatePairCostSavings;
ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
FixedOrderPairs,
CandidatePairCostSavings, CandidatePairCostSavings,
PairableInsts, NonPow2Len); PairableInsts, NonPow2Len);
if (PairableInsts.empty()) continue; if (PairableInsts.empty()) continue;
@ -690,6 +691,14 @@ namespace {
AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
PairableInsts.end()); PairableInsts.end());
AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(),
IE = ChosenPairs.end(); I != IE; ++I) {
if (FixedOrderPairs.count(*I))
AllFixedOrderPairs.insert(*I);
else if (FixedOrderPairs.count(ValuePair(I->second, I->first)))
AllFixedOrderPairs.insert(ValuePair(I->second, I->first));
}
} while (ShouldContinue); } while (ShouldContinue);
if (AllChosenPairs.empty()) return false; if (AllChosenPairs.empty()) return false;
@ -702,7 +711,7 @@ namespace {
// replaced with a vector_extract on the result. Subsequent optimization // replaced with a vector_extract on the result. Subsequent optimization
// passes should coalesce the build/extract combinations. // passes should coalesce the build/extract combinations.
fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs); fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs);
// It is important to cleanup here so that future iterations of this // It is important to cleanup here so that future iterations of this
// function have less work to do. // function have less work to do.
@ -816,11 +825,12 @@ namespace {
// in the use tree of I. // in the use tree of I.
bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
bool IsSimpleLoadStore, bool NonPow2Len, bool IsSimpleLoadStore, bool NonPow2Len,
int &CostSavings) { int &CostSavings, int &FixedOrder) {
DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
" <-> " << *J << "\n"); " <-> " << *J << "\n");
CostSavings = 0; CostSavings = 0;
FixedOrder = 0;
// Loads and stores can be merged if they have different alignments, // Loads and stores can be merged if they have different alignments,
// but are otherwise the same. // but are otherwise the same.
@ -846,6 +856,7 @@ namespace {
if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
IAddressSpace, JAddressSpace, IAddressSpace, JAddressSpace,
OffsetInElmts) && abs64(OffsetInElmts) == 1) { OffsetInElmts) && abs64(OffsetInElmts) == 1) {
FixedOrder = (int) OffsetInElmts;
unsigned BottomAlignment = IAlignment; unsigned BottomAlignment = IAlignment;
if (OffsetInElmts < 0) BottomAlignment = JAlignment; if (OffsetInElmts < 0) BottomAlignment = JAlignment;
@ -992,6 +1003,7 @@ namespace {
bool BBVectorize::getCandidatePairs(BasicBlock &BB, bool BBVectorize::getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start, BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs, std::multimap<Value *, Value *> &CandidatePairs,
DenseSet<ValuePair> &FixedOrderPairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings, DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len) { std::vector<Value *> &PairableInsts, bool NonPow2Len) {
BasicBlock::iterator E = BB.end(); BasicBlock::iterator E = BB.end();
@ -1029,9 +1041,9 @@ namespace {
// J does not use I, and comes before the first use of I, so it can be // J does not use I, and comes before the first use of I, so it can be
// merged with I if the instructions are compatible. // merged with I if the instructions are compatible.
int CostSavings; int CostSavings, FixedOrder;
if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len, if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
CostSavings)) continue; CostSavings, FixedOrder)) continue;
// J is a candidate for merging with I. // J is a candidate for merging with I.
if (!PairableInsts.size() || if (!PairableInsts.size() ||
@ -1044,6 +1056,11 @@ namespace {
CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
CostSavings)); CostSavings));
if (FixedOrder == 1)
FixedOrderPairs.insert(ValuePair(I, J));
else if (FixedOrder == -1)
FixedOrderPairs.insert(ValuePair(J, I));
// The next call to this function must start after the last instruction // The next call to this function must start after the last instruction
// selected during this invocation. // selected during this invocation.
if (JAfterStart) { if (JAfterStart) {
@ -2341,37 +2358,6 @@ namespace {
} }
} }
// As with the aliasing information, SCEV can also change because of
// vectorization. This information is used to compute relative pointer
// offsets; the necessary information will be cached here prior to
// fusion.
void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<Value *> &LowPtrInsts) {
for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
PIE = PairableInsts.end(); PI != PIE; ++PI) {
DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
if (P == ChosenPairs.end()) continue;
Instruction *I = cast<Instruction>(P->first);
Instruction *J = cast<Instruction>(P->second);
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
continue;
Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts;
if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
IAddressSpace, JAddressSpace,
OffsetInElmts) || abs64(OffsetInElmts) != 1)
llvm_unreachable("Pre-fusion pointer analysis failed");
Value *LowPI = (OffsetInElmts > 0) ? I : J;
LowPtrInsts.insert(LowPI);
}
}
// When the first instruction in each pair is cloned, it will inherit its // When the first instruction in each pair is cloned, it will inherit its
// parent's metadata. This metadata must be combined with that of the other // parent's metadata. This metadata must be combined with that of the other
// instruction in a safe way. // instruction in a safe way.
@ -2405,7 +2391,8 @@ namespace {
// second member). // second member).
void BBVectorize::fuseChosenPairs(BasicBlock &BB, void BBVectorize::fuseChosenPairs(BasicBlock &BB,
std::vector<Value *> &PairableInsts, std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *> &ChosenPairs) { DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<ValuePair> &FixedOrderPairs) {
LLVMContext& Context = BB.getContext(); LLVMContext& Context = BB.getContext();
// During the vectorization process, the order of the pairs to be fused // During the vectorization process, the order of the pairs to be fused
@ -2423,9 +2410,6 @@ namespace {
std::multimap<Value *, Value *> LoadMoveSet; std::multimap<Value *, Value *> LoadMoveSet;
collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
DenseSet<Value *> LowPtrInsts;
collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts);
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
@ -2465,12 +2449,11 @@ namespace {
continue; continue;
} }
bool FlipMemInputs = false; // If the pair must have the other order, then flip it.
if (isa<LoadInst>(I) || isa<StoreInst>(I)) bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I));
FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end());
Instruction *L = I, *H = J; Instruction *L = I, *H = J;
if (FlipMemInputs) if (FlipPairOrder)
std::swap(H, L); std::swap(H, L);
unsigned NumOperands = I->getNumOperands(); unsigned NumOperands = I->getNumOperands();
@ -2492,10 +2475,10 @@ namespace {
// If we've flipped the memory inputs, make sure that we take the correct // If we've flipped the memory inputs, make sure that we take the correct
// alignment. // alignment.
if (FlipMemInputs) { if (FlipPairOrder) {
if (isa<StoreInst>(K)) if (isa<StoreInst>(K))
cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment()); cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment());
else else if (isa<LoadInst>(K))
cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment()); cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment());
} }