mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
Precompute SCEV pointer analysis prior to instruction fusion in BBVectorize.
When both a load/store and its address computation are being vectorized, it can happen that the address-computation vectorization destroys SCEV's ability to analyize the relative pointer offsets. As a result (like with the aliasing analysis info), we need to precompute the necessary information prior to instruction fusing. This was found during stress testing (running through the test suite with a very low required chain length); unfortunately, I don't have a small test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159332 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a9ad9f64d5
commit
282969ed36
@ -277,7 +277,7 @@ namespace {
|
|||||||
bool UseCycleCheck);
|
bool UseCycleCheck);
|
||||||
|
|
||||||
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
|
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
|
||||||
Instruction *J, unsigned o, bool &FlipMemInputs);
|
Instruction *J, unsigned o, bool FlipMemInputs);
|
||||||
|
|
||||||
void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
|
void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
|
||||||
unsigned MaskOffset, unsigned NumInElem,
|
unsigned MaskOffset, unsigned NumInElem,
|
||||||
@ -297,12 +297,12 @@ namespace {
|
|||||||
|
|
||||||
void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
|
void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
|
||||||
Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
|
Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
|
||||||
bool &FlipMemInputs);
|
bool FlipMemInputs);
|
||||||
|
|
||||||
void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
|
void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
|
||||||
Instruction *J, Instruction *K,
|
Instruction *J, Instruction *K,
|
||||||
Instruction *&InsertionPt, Instruction *&K1,
|
Instruction *&InsertionPt, Instruction *&K1,
|
||||||
Instruction *&K2, bool &FlipMemInputs);
|
Instruction *&K2, bool FlipMemInputs);
|
||||||
|
|
||||||
void collectPairLoadMoveSet(BasicBlock &BB,
|
void collectPairLoadMoveSet(BasicBlock &BB,
|
||||||
DenseMap<Value *, Value *> &ChosenPairs,
|
DenseMap<Value *, Value *> &ChosenPairs,
|
||||||
@ -314,6 +314,10 @@ namespace {
|
|||||||
DenseMap<Value *, Value *> &ChosenPairs,
|
DenseMap<Value *, Value *> &ChosenPairs,
|
||||||
std::multimap<Value *, Value *> &LoadMoveSet);
|
std::multimap<Value *, Value *> &LoadMoveSet);
|
||||||
|
|
||||||
|
void collectPtrInfo(std::vector<Value *> &PairableInsts,
|
||||||
|
DenseMap<Value *, Value *> &ChosenPairs,
|
||||||
|
DenseSet<Value *> &LowPtrInsts);
|
||||||
|
|
||||||
bool canMoveUsesOfIAfterJ(BasicBlock &BB,
|
bool canMoveUsesOfIAfterJ(BasicBlock &BB,
|
||||||
std::multimap<Value *, Value *> &LoadMoveSet,
|
std::multimap<Value *, Value *> &LoadMoveSet,
|
||||||
Instruction *I, Instruction *J);
|
Instruction *I, Instruction *J);
|
||||||
@ -1487,19 +1491,21 @@ namespace {
|
|||||||
// instruction that fuses I with J.
|
// instruction that fuses I with J.
|
||||||
Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
|
Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
|
||||||
Instruction *I, Instruction *J, unsigned o,
|
Instruction *I, Instruction *J, unsigned o,
|
||||||
bool &FlipMemInputs) {
|
bool FlipMemInputs) {
|
||||||
Value *IPtr, *JPtr;
|
Value *IPtr, *JPtr;
|
||||||
unsigned IAlignment, JAlignment;
|
unsigned IAlignment, JAlignment;
|
||||||
int64_t OffsetInElmts;
|
int64_t OffsetInElmts;
|
||||||
|
|
||||||
|
// Note: the analysis might fail here, that is why FlipMemInputs has
|
||||||
|
// been precomputed (OffsetInElmts must be unused here).
|
||||||
(void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
|
(void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
|
||||||
OffsetInElmts);
|
OffsetInElmts);
|
||||||
|
|
||||||
// The pointer value is taken to be the one with the lowest offset.
|
// The pointer value is taken to be the one with the lowest offset.
|
||||||
Value *VPtr;
|
Value *VPtr;
|
||||||
if (OffsetInElmts > 0) {
|
if (!FlipMemInputs) {
|
||||||
VPtr = IPtr;
|
VPtr = IPtr;
|
||||||
} else {
|
} else {
|
||||||
FlipMemInputs = true;
|
|
||||||
VPtr = JPtr;
|
VPtr = JPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1970,8 +1976,7 @@ namespace {
|
|||||||
void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
|
void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
|
||||||
Instruction *I, Instruction *J,
|
Instruction *I, Instruction *J,
|
||||||
SmallVector<Value *, 3> &ReplacedOperands,
|
SmallVector<Value *, 3> &ReplacedOperands,
|
||||||
bool &FlipMemInputs) {
|
bool FlipMemInputs) {
|
||||||
FlipMemInputs = false;
|
|
||||||
unsigned NumOperands = I->getNumOperands();
|
unsigned NumOperands = I->getNumOperands();
|
||||||
|
|
||||||
for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
|
for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
|
||||||
@ -2022,7 +2027,7 @@ namespace {
|
|||||||
Instruction *J, Instruction *K,
|
Instruction *J, Instruction *K,
|
||||||
Instruction *&InsertionPt,
|
Instruction *&InsertionPt,
|
||||||
Instruction *&K1, Instruction *&K2,
|
Instruction *&K1, Instruction *&K2,
|
||||||
bool &FlipMemInputs) {
|
bool FlipMemInputs) {
|
||||||
if (isa<StoreInst>(I)) {
|
if (isa<StoreInst>(I)) {
|
||||||
AA->replaceWithNewValue(I, K);
|
AA->replaceWithNewValue(I, K);
|
||||||
AA->replaceWithNewValue(J, K);
|
AA->replaceWithNewValue(J, K);
|
||||||
@ -2176,6 +2181,36 @@ namespace {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// As with the aliasing information, SCEV can also change because of
|
||||||
|
// vectorization. This information is used to compute relative pointer
|
||||||
|
// offsets; the necessary information will be cached here prior to
|
||||||
|
// fusion.
|
||||||
|
void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts,
|
||||||
|
DenseMap<Value *, Value *> &ChosenPairs,
|
||||||
|
DenseSet<Value *> &LowPtrInsts) {
|
||||||
|
for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
|
||||||
|
PIE = PairableInsts.end(); PI != PIE; ++PI) {
|
||||||
|
DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
|
||||||
|
if (P == ChosenPairs.end()) continue;
|
||||||
|
|
||||||
|
Instruction *I = cast<Instruction>(P->first);
|
||||||
|
Instruction *J = cast<Instruction>(P->second);
|
||||||
|
|
||||||
|
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Value *IPtr, *JPtr;
|
||||||
|
unsigned IAlignment, JAlignment;
|
||||||
|
int64_t OffsetInElmts;
|
||||||
|
if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
|
||||||
|
OffsetInElmts) || abs64(OffsetInElmts) != 1)
|
||||||
|
llvm_unreachable("Pre-fusion pointer analysis failed");
|
||||||
|
|
||||||
|
Value *LowPI = (OffsetInElmts > 0) ? I : J;
|
||||||
|
LowPtrInsts.insert(LowPI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// When the first instruction in each pair is cloned, it will inherit its
|
// When the first instruction in each pair is cloned, it will inherit its
|
||||||
// parent's metadata. This metadata must be combined with that of the other
|
// parent's metadata. This metadata must be combined with that of the other
|
||||||
// instruction in a safe way.
|
// instruction in a safe way.
|
||||||
@ -2227,6 +2262,9 @@ namespace {
|
|||||||
std::multimap<Value *, Value *> LoadMoveSet;
|
std::multimap<Value *, Value *> LoadMoveSet;
|
||||||
collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
|
collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
|
||||||
|
|
||||||
|
DenseSet<Value *> LowPtrInsts;
|
||||||
|
collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts);
|
||||||
|
|
||||||
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
|
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
|
||||||
|
|
||||||
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
|
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
|
||||||
@ -2266,7 +2304,10 @@ namespace {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FlipMemInputs;
|
bool FlipMemInputs = false;
|
||||||
|
if (isa<LoadInst>(I) || isa<StoreInst>(I))
|
||||||
|
FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end());
|
||||||
|
|
||||||
unsigned NumOperands = I->getNumOperands();
|
unsigned NumOperands = I->getNumOperands();
|
||||||
SmallVector<Value *, 3> ReplacedOperands(NumOperands);
|
SmallVector<Value *, 3> ReplacedOperands(NumOperands);
|
||||||
getReplacementInputsForPair(Context, I, J, ReplacedOperands,
|
getReplacementInputsForPair(Context, I, J, ReplacedOperands,
|
||||||
|
Loading…
Reference in New Issue
Block a user