Precompute SCEV pointer analysis prior to instruction fusion in BBVectorize.

When both a load/store and its address computation are being vectorized, it can
happen that the address-computation vectorization destroys SCEV's ability
to analyize the relative pointer offsets. As a result (like with the aliasing
analysis info), we need to precompute the necessary information prior to
instruction fusing.

This was found during stress testing (running through the test suite with a very
low required chain length); unfortunately, I don't have a small test case.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159332 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2012-06-28 05:42:45 +00:00
parent a9ad9f64d5
commit 282969ed36

View File

@ -277,7 +277,7 @@ namespace {
bool UseCycleCheck); bool UseCycleCheck);
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
Instruction *J, unsigned o, bool &FlipMemInputs); Instruction *J, unsigned o, bool FlipMemInputs);
void fillNewShuffleMask(LLVMContext& Context, Instruction *J, void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
unsigned MaskOffset, unsigned NumInElem, unsigned MaskOffset, unsigned NumInElem,
@ -297,12 +297,12 @@ namespace {
void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
bool &FlipMemInputs); bool FlipMemInputs);
void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
Instruction *J, Instruction *K, Instruction *J, Instruction *K,
Instruction *&InsertionPt, Instruction *&K1, Instruction *&InsertionPt, Instruction *&K1,
Instruction *&K2, bool &FlipMemInputs); Instruction *&K2, bool FlipMemInputs);
void collectPairLoadMoveSet(BasicBlock &BB, void collectPairLoadMoveSet(BasicBlock &BB,
DenseMap<Value *, Value *> &ChosenPairs, DenseMap<Value *, Value *> &ChosenPairs,
@ -314,6 +314,10 @@ namespace {
DenseMap<Value *, Value *> &ChosenPairs, DenseMap<Value *, Value *> &ChosenPairs,
std::multimap<Value *, Value *> &LoadMoveSet); std::multimap<Value *, Value *> &LoadMoveSet);
void collectPtrInfo(std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<Value *> &LowPtrInsts);
bool canMoveUsesOfIAfterJ(BasicBlock &BB, bool canMoveUsesOfIAfterJ(BasicBlock &BB,
std::multimap<Value *, Value *> &LoadMoveSet, std::multimap<Value *, Value *> &LoadMoveSet,
Instruction *I, Instruction *J); Instruction *I, Instruction *J);
@ -1487,19 +1491,21 @@ namespace {
// instruction that fuses I with J. // instruction that fuses I with J.
Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
Instruction *I, Instruction *J, unsigned o, Instruction *I, Instruction *J, unsigned o,
bool &FlipMemInputs) { bool FlipMemInputs) {
Value *IPtr, *JPtr; Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment; unsigned IAlignment, JAlignment;
int64_t OffsetInElmts; int64_t OffsetInElmts;
// Note: the analysis might fail here, that is why FlipMemInputs has
// been precomputed (OffsetInElmts must be unused here).
(void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
OffsetInElmts); OffsetInElmts);
// The pointer value is taken to be the one with the lowest offset. // The pointer value is taken to be the one with the lowest offset.
Value *VPtr; Value *VPtr;
if (OffsetInElmts > 0) { if (!FlipMemInputs) {
VPtr = IPtr; VPtr = IPtr;
} else { } else {
FlipMemInputs = true;
VPtr = JPtr; VPtr = JPtr;
} }
@ -1970,8 +1976,7 @@ namespace {
void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
Instruction *I, Instruction *J, Instruction *I, Instruction *J,
SmallVector<Value *, 3> &ReplacedOperands, SmallVector<Value *, 3> &ReplacedOperands,
bool &FlipMemInputs) { bool FlipMemInputs) {
FlipMemInputs = false;
unsigned NumOperands = I->getNumOperands(); unsigned NumOperands = I->getNumOperands();
for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
@ -2022,7 +2027,7 @@ namespace {
Instruction *J, Instruction *K, Instruction *J, Instruction *K,
Instruction *&InsertionPt, Instruction *&InsertionPt,
Instruction *&K1, Instruction *&K2, Instruction *&K1, Instruction *&K2,
bool &FlipMemInputs) { bool FlipMemInputs) {
if (isa<StoreInst>(I)) { if (isa<StoreInst>(I)) {
AA->replaceWithNewValue(I, K); AA->replaceWithNewValue(I, K);
AA->replaceWithNewValue(J, K); AA->replaceWithNewValue(J, K);
@ -2176,6 +2181,36 @@ namespace {
} }
} }
// As with the aliasing information, SCEV can also change because of
// vectorization. This information is used to compute relative pointer
// offsets; the necessary information will be cached here prior to
// fusion.
void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<Value *> &LowPtrInsts) {
for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
PIE = PairableInsts.end(); PI != PIE; ++PI) {
DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
if (P == ChosenPairs.end()) continue;
Instruction *I = cast<Instruction>(P->first);
Instruction *J = cast<Instruction>(P->second);
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
continue;
Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment;
int64_t OffsetInElmts;
if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
OffsetInElmts) || abs64(OffsetInElmts) != 1)
llvm_unreachable("Pre-fusion pointer analysis failed");
Value *LowPI = (OffsetInElmts > 0) ? I : J;
LowPtrInsts.insert(LowPI);
}
}
// When the first instruction in each pair is cloned, it will inherit its // When the first instruction in each pair is cloned, it will inherit its
// parent's metadata. This metadata must be combined with that of the other // parent's metadata. This metadata must be combined with that of the other
// instruction in a safe way. // instruction in a safe way.
@ -2227,6 +2262,9 @@ namespace {
std::multimap<Value *, Value *> LoadMoveSet; std::multimap<Value *, Value *> LoadMoveSet;
collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
DenseSet<Value *> LowPtrInsts;
collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts);
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
@ -2266,7 +2304,10 @@ namespace {
continue; continue;
} }
bool FlipMemInputs; bool FlipMemInputs = false;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end());
unsigned NumOperands = I->getNumOperands(); unsigned NumOperands = I->getNumOperands();
SmallVector<Value *, 3> ReplacedOperands(NumOperands); SmallVector<Value *, 3> ReplacedOperands(NumOperands);
getReplacementInputsForPair(Context, I, J, ReplacedOperands, getReplacementInputsForPair(Context, I, J, ReplacedOperands,