From 282969ed3641ffa426e0440d3824dd219152b2d8 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 28 Jun 2012 05:42:45 +0000 Subject: [PATCH] Precompute SCEV pointer analysis prior to instruction fusion in BBVectorize. When both a load/store and its address computation are being vectorized, it can happen that the address-computation vectorization destroys SCEV's ability to analyize the relative pointer offsets. As a result (like with the aliasing analysis info), we need to precompute the necessary information prior to instruction fusing. This was found during stress testing (running through the test suite with a very low required chain length); unfortunately, I don't have a small test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159332 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 61 ++++++++++++++++++++---- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index af14ee304b9..62d23cb948f 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -277,7 +277,7 @@ namespace { bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool &FlipMemInputs); + Instruction *J, unsigned o, bool FlipMemInputs); void fillNewShuffleMask(LLVMContext& Context, Instruction *J, unsigned MaskOffset, unsigned NumInElem, @@ -297,12 +297,12 @@ namespace { void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector &ReplacedOperands, - bool &FlipMemInputs); + bool FlipMemInputs); void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, - Instruction *&K2, bool &FlipMemInputs); + Instruction *&K2, bool FlipMemInputs); void collectPairLoadMoveSet(BasicBlock &BB, DenseMap &ChosenPairs, @@ -314,6 +314,10 @@ namespace { DenseMap &ChosenPairs, std::multimap &LoadMoveSet); + void collectPtrInfo(std::vector &PairableInsts, + DenseMap &ChosenPairs, + DenseSet &LowPtrInsts); + bool canMoveUsesOfIAfterJ(BasicBlock &BB, std::multimap &LoadMoveSet, Instruction *I, Instruction *J); @@ -1487,19 +1491,21 @@ namespace { // instruction that fuses I with J. Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o, - bool &FlipMemInputs) { + bool FlipMemInputs) { Value *IPtr, *JPtr; unsigned IAlignment, JAlignment; int64_t OffsetInElmts; + + // Note: the analysis might fail here, that is why FlipMemInputs has + // been precomputed (OffsetInElmts must be unused here). (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts); // The pointer value is taken to be the one with the lowest offset. Value *VPtr; - if (OffsetInElmts > 0) { + if (!FlipMemInputs) { VPtr = IPtr; } else { - FlipMemInputs = true; VPtr = JPtr; } @@ -1970,8 +1976,7 @@ namespace { void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector &ReplacedOperands, - bool &FlipMemInputs) { - FlipMemInputs = false; + bool FlipMemInputs) { unsigned NumOperands = I->getNumOperands(); for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { @@ -2022,7 +2027,7 @@ namespace { Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, Instruction *&K2, - bool &FlipMemInputs) { + bool FlipMemInputs) { if (isa(I)) { AA->replaceWithNewValue(I, K); AA->replaceWithNewValue(J, K); @@ -2176,6 +2181,36 @@ namespace { } } + // As with the aliasing information, SCEV can also change because of + // vectorization. This information is used to compute relative pointer + // offsets; the necessary information will be cached here prior to + // fusion. + void BBVectorize::collectPtrInfo(std::vector &PairableInsts, + DenseMap &ChosenPairs, + DenseSet &LowPtrInsts) { + for (std::vector::iterator PI = PairableInsts.begin(), + PIE = PairableInsts.end(); PI != PIE; ++PI) { + DenseMap::iterator P = ChosenPairs.find(*PI); + if (P == ChosenPairs.end()) continue; + + Instruction *I = cast(P->first); + Instruction *J = cast(P->second); + + if (!isa(I) && !isa(I)) + continue; + + Value *IPtr, *JPtr; + unsigned IAlignment, JAlignment; + int64_t OffsetInElmts; + if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, + OffsetInElmts) || abs64(OffsetInElmts) != 1) + llvm_unreachable("Pre-fusion pointer analysis failed"); + + Value *LowPI = (OffsetInElmts > 0) ? I : J; + LowPtrInsts.insert(LowPI); + } + } + // When the first instruction in each pair is cloned, it will inherit its // parent's metadata. This metadata must be combined with that of the other // instruction in a safe way. @@ -2227,6 +2262,9 @@ namespace { std::multimap LoadMoveSet; collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); + DenseSet LowPtrInsts; + collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts); + DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { @@ -2266,7 +2304,10 @@ namespace { continue; } - bool FlipMemInputs; + bool FlipMemInputs = false; + if (isa(I) || isa(I)) + FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end()); + unsigned NumOperands = I->getNumOperands(); SmallVector ReplacedOperands(NumOperands); getReplacementInputsForPair(Context, I, J, ReplacedOperands,