mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	Precompute SCEV pointer analysis prior to instruction fusion in BBVectorize.
When both a load/store and its address computation are being vectorized, it can happen that the address-computation vectorization destroys SCEV's ability to analyize the relative pointer offsets. As a result (like with the aliasing analysis info), we need to precompute the necessary information prior to instruction fusing. This was found during stress testing (running through the test suite with a very low required chain length); unfortunately, I don't have a small test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159332 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -277,7 +277,7 @@ namespace { | |||||||
|                       bool UseCycleCheck); |                       bool UseCycleCheck); | ||||||
|  |  | ||||||
|     Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, |     Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, | ||||||
|                      Instruction *J, unsigned o, bool &FlipMemInputs); |                      Instruction *J, unsigned o, bool FlipMemInputs); | ||||||
|  |  | ||||||
|     void fillNewShuffleMask(LLVMContext& Context, Instruction *J, |     void fillNewShuffleMask(LLVMContext& Context, Instruction *J, | ||||||
|                      unsigned MaskOffset, unsigned NumInElem, |                      unsigned MaskOffset, unsigned NumInElem, | ||||||
| @@ -297,12 +297,12 @@ namespace { | |||||||
|  |  | ||||||
|     void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, |     void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, | ||||||
|                      Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, |                      Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, | ||||||
|                      bool &FlipMemInputs); |                      bool FlipMemInputs); | ||||||
|  |  | ||||||
|     void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, |     void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, | ||||||
|                      Instruction *J, Instruction *K, |                      Instruction *J, Instruction *K, | ||||||
|                      Instruction *&InsertionPt, Instruction *&K1, |                      Instruction *&InsertionPt, Instruction *&K1, | ||||||
|                      Instruction *&K2, bool &FlipMemInputs); |                      Instruction *&K2, bool FlipMemInputs); | ||||||
|  |  | ||||||
|     void collectPairLoadMoveSet(BasicBlock &BB, |     void collectPairLoadMoveSet(BasicBlock &BB, | ||||||
|                      DenseMap<Value *, Value *> &ChosenPairs, |                      DenseMap<Value *, Value *> &ChosenPairs, | ||||||
| @@ -314,6 +314,10 @@ namespace { | |||||||
|                      DenseMap<Value *, Value *> &ChosenPairs, |                      DenseMap<Value *, Value *> &ChosenPairs, | ||||||
|                      std::multimap<Value *, Value *> &LoadMoveSet); |                      std::multimap<Value *, Value *> &LoadMoveSet); | ||||||
|  |  | ||||||
|  |     void collectPtrInfo(std::vector<Value *> &PairableInsts, | ||||||
|  |                         DenseMap<Value *, Value *> &ChosenPairs, | ||||||
|  |                         DenseSet<Value *> &LowPtrInsts); | ||||||
|  |  | ||||||
|     bool canMoveUsesOfIAfterJ(BasicBlock &BB, |     bool canMoveUsesOfIAfterJ(BasicBlock &BB, | ||||||
|                      std::multimap<Value *, Value *> &LoadMoveSet, |                      std::multimap<Value *, Value *> &LoadMoveSet, | ||||||
|                      Instruction *I, Instruction *J); |                      Instruction *I, Instruction *J); | ||||||
| @@ -1487,19 +1491,21 @@ namespace { | |||||||
|   // instruction that fuses I with J. |   // instruction that fuses I with J. | ||||||
|   Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, |   Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, | ||||||
|                      Instruction *I, Instruction *J, unsigned o, |                      Instruction *I, Instruction *J, unsigned o, | ||||||
|                      bool &FlipMemInputs) { |                      bool FlipMemInputs) { | ||||||
|     Value *IPtr, *JPtr; |     Value *IPtr, *JPtr; | ||||||
|     unsigned IAlignment, JAlignment; |     unsigned IAlignment, JAlignment; | ||||||
|     int64_t OffsetInElmts; |     int64_t OffsetInElmts; | ||||||
|  |  | ||||||
|  |     // Note: the analysis might fail here, that is why FlipMemInputs has | ||||||
|  |     // been precomputed (OffsetInElmts must be unused here). | ||||||
|     (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, |     (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, | ||||||
|                           OffsetInElmts); |                           OffsetInElmts); | ||||||
|  |  | ||||||
|     // The pointer value is taken to be the one with the lowest offset. |     // The pointer value is taken to be the one with the lowest offset. | ||||||
|     Value *VPtr; |     Value *VPtr; | ||||||
|     if (OffsetInElmts > 0) { |     if (!FlipMemInputs) { | ||||||
|       VPtr = IPtr; |       VPtr = IPtr; | ||||||
|     } else { |     } else { | ||||||
|       FlipMemInputs = true; |  | ||||||
|       VPtr = JPtr; |       VPtr = JPtr; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1970,8 +1976,7 @@ namespace { | |||||||
|   void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, |   void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, | ||||||
|                      Instruction *I, Instruction *J, |                      Instruction *I, Instruction *J, | ||||||
|                      SmallVector<Value *, 3> &ReplacedOperands, |                      SmallVector<Value *, 3> &ReplacedOperands, | ||||||
|                      bool &FlipMemInputs) { |                      bool FlipMemInputs) { | ||||||
|     FlipMemInputs = false; |  | ||||||
|     unsigned NumOperands = I->getNumOperands(); |     unsigned NumOperands = I->getNumOperands(); | ||||||
|  |  | ||||||
|     for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { |     for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { | ||||||
| @@ -2022,7 +2027,7 @@ namespace { | |||||||
|                      Instruction *J, Instruction *K, |                      Instruction *J, Instruction *K, | ||||||
|                      Instruction *&InsertionPt, |                      Instruction *&InsertionPt, | ||||||
|                      Instruction *&K1, Instruction *&K2, |                      Instruction *&K1, Instruction *&K2, | ||||||
|                      bool &FlipMemInputs) { |                      bool FlipMemInputs) { | ||||||
|     if (isa<StoreInst>(I)) { |     if (isa<StoreInst>(I)) { | ||||||
|       AA->replaceWithNewValue(I, K); |       AA->replaceWithNewValue(I, K); | ||||||
|       AA->replaceWithNewValue(J, K); |       AA->replaceWithNewValue(J, K); | ||||||
| @@ -2176,6 +2181,36 @@ namespace { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   // As with the aliasing information, SCEV can also change because of | ||||||
|  |   // vectorization. This information is used to compute relative pointer | ||||||
|  |   // offsets; the necessary information will be cached here prior to | ||||||
|  |   // fusion. | ||||||
|  |   void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts, | ||||||
|  |                                    DenseMap<Value *, Value *> &ChosenPairs, | ||||||
|  |                                    DenseSet<Value *> &LowPtrInsts) { | ||||||
|  |     for (std::vector<Value *>::iterator PI = PairableInsts.begin(), | ||||||
|  |       PIE = PairableInsts.end(); PI != PIE; ++PI) { | ||||||
|  |       DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); | ||||||
|  |       if (P == ChosenPairs.end()) continue; | ||||||
|  |  | ||||||
|  |       Instruction *I = cast<Instruction>(P->first); | ||||||
|  |       Instruction *J = cast<Instruction>(P->second); | ||||||
|  |  | ||||||
|  |       if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) | ||||||
|  |         continue; | ||||||
|  |  | ||||||
|  |       Value *IPtr, *JPtr; | ||||||
|  |       unsigned IAlignment, JAlignment; | ||||||
|  |       int64_t OffsetInElmts; | ||||||
|  |       if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, | ||||||
|  |                           OffsetInElmts) || abs64(OffsetInElmts) != 1) | ||||||
|  |         llvm_unreachable("Pre-fusion pointer analysis failed"); | ||||||
|  |  | ||||||
|  |       Value *LowPI = (OffsetInElmts > 0) ? I : J; | ||||||
|  |       LowPtrInsts.insert(LowPI); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|   // When the first instruction in each pair is cloned, it will inherit its |   // When the first instruction in each pair is cloned, it will inherit its | ||||||
|   // parent's metadata. This metadata must be combined with that of the other |   // parent's metadata. This metadata must be combined with that of the other | ||||||
|   // instruction in a safe way. |   // instruction in a safe way. | ||||||
| @@ -2227,6 +2262,9 @@ namespace { | |||||||
|     std::multimap<Value *, Value *> LoadMoveSet; |     std::multimap<Value *, Value *> LoadMoveSet; | ||||||
|     collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); |     collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); | ||||||
|  |  | ||||||
|  |     DenseSet<Value *> LowPtrInsts; | ||||||
|  |     collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts); | ||||||
|  |  | ||||||
|     DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); |     DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); | ||||||
|  |  | ||||||
|     for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { |     for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { | ||||||
| @@ -2266,7 +2304,10 @@ namespace { | |||||||
|         continue; |         continue; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       bool FlipMemInputs; |       bool FlipMemInputs = false; | ||||||
|  |       if (isa<LoadInst>(I) || isa<StoreInst>(I)) | ||||||
|  |         FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end()); | ||||||
|  |  | ||||||
|       unsigned NumOperands = I->getNumOperands(); |       unsigned NumOperands = I->getNumOperands(); | ||||||
|       SmallVector<Value *, 3> ReplacedOperands(NumOperands); |       SmallVector<Value *, 3> ReplacedOperands(NumOperands); | ||||||
|       getReplacementInputsForPair(Context, I, J, ReplacedOperands, |       getReplacementInputsForPair(Context, I, J, ReplacedOperands, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user