mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 06:32:24 +00:00
Teach GlobalLoadUsesSimpleEnoughForHeapSRA and the SROA rewriter how to handle
a limited form of PHI nodes. This finally fixes PR1639, speeding 179.art up from 7.84s to 3.13s on PPC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@41933 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a637a8b1e7
commit
309f20fc45
@ -867,7 +867,8 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
|
|||||||
|
|
||||||
/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
|
/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
|
||||||
/// GV are simple enough to perform HeapSRA, return true.
|
/// GV are simple enough to perform HeapSRA, return true.
|
||||||
static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) {
|
static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
|
||||||
|
MallocInst *MI) {
|
||||||
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
|
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
|
||||||
++UI)
|
++UI)
|
||||||
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
|
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
|
||||||
@ -883,15 +884,35 @@ static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// getelementptr is also ok, but only a simple form.
|
// getelementptr is also ok, but only a simple form.
|
||||||
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI);
|
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) {
|
||||||
if (!GEPI) return false;
|
// Must index into the array and into the struct.
|
||||||
|
if (GEPI->getNumOperands() < 3)
|
||||||
|
return false;
|
||||||
|
|
||||||
// Must index into the array and into the struct.
|
// Otherwise the GEP is ok.
|
||||||
if (GEPI->getNumOperands() < 3)
|
continue;
|
||||||
return false;
|
}
|
||||||
|
|
||||||
// Otherwise the GEP is ok.
|
if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
|
||||||
continue;
|
// We have a phi of a load from the global. We can only handle this
|
||||||
|
// if the other PHI'd values are actually the same. In this case,
|
||||||
|
// the rewriter will just drop the phi entirely.
|
||||||
|
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
||||||
|
Value *IV = PN->getIncomingValue(i);
|
||||||
|
if (IV == LI) continue; // Trivial the same.
|
||||||
|
|
||||||
|
// If the phi'd value is from the malloc that initializes the value,
|
||||||
|
// we can xform it.
|
||||||
|
if (IV == MI) continue;
|
||||||
|
|
||||||
|
// Otherwise, we don't know what it is.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise we don't know what this is, not ok.
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -899,7 +920,7 @@ static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) {
|
|||||||
|
|
||||||
/// GetHeapSROALoad - Return the load for the specified field of the HeapSROA'd
|
/// GetHeapSROALoad - Return the load for the specified field of the HeapSROA'd
|
||||||
/// value, lazily creating it on demand.
|
/// value, lazily creating it on demand.
|
||||||
static Value *GetHeapSROALoad(LoadInst *Load, unsigned FieldNo,
|
static Value *GetHeapSROALoad(Instruction *Load, unsigned FieldNo,
|
||||||
const std::vector<GlobalVariable*> &FieldGlobals,
|
const std::vector<GlobalVariable*> &FieldGlobals,
|
||||||
std::vector<Value *> &InsertedLoadsForPtr) {
|
std::vector<Value *> &InsertedLoadsForPtr) {
|
||||||
if (InsertedLoadsForPtr.size() <= FieldNo)
|
if (InsertedLoadsForPtr.size() <= FieldNo)
|
||||||
@ -958,12 +979,39 @@ static void RewriteHeapSROALoadUser(LoadInst *Load, Instruction *LoadUser,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle PHI nodes. All PHI nodes must be merging in the same values, so
|
// Handle PHI nodes. PHI nodes must be merging in the same values, plus
|
||||||
// just treat them like a copy.
|
// potentially the original malloc. Insert phi nodes for each field, then
|
||||||
|
// process uses of the PHI.
|
||||||
PHINode *PN = cast<PHINode>(LoadUser);
|
PHINode *PN = cast<PHINode>(LoadUser);
|
||||||
|
std::vector<Value *> PHIsForField;
|
||||||
|
PHIsForField.resize(FieldGlobals.size());
|
||||||
|
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
|
||||||
|
Value *LoadV = GetHeapSROALoad(Load, i, FieldGlobals, InsertedLoadsForPtr);
|
||||||
|
|
||||||
|
PHINode *FieldPN = new PHINode(LoadV->getType(),
|
||||||
|
PN->getName()+"."+utostr(i), PN);
|
||||||
|
// Fill in the predecessor values.
|
||||||
|
for (unsigned pred = 0, e = PN->getNumIncomingValues(); pred != e; ++pred) {
|
||||||
|
// Each predecessor either uses the load or the original malloc.
|
||||||
|
Value *InVal = PN->getIncomingValue(pred);
|
||||||
|
BasicBlock *BB = PN->getIncomingBlock(pred);
|
||||||
|
Value *NewVal;
|
||||||
|
if (isa<MallocInst>(InVal)) {
|
||||||
|
// Insert a reload from the global in the predecessor.
|
||||||
|
NewVal = GetHeapSROALoad(BB->getTerminator(), i, FieldGlobals,
|
||||||
|
PHIsForField);
|
||||||
|
} else {
|
||||||
|
NewVal = InsertedLoadsForPtr[i];
|
||||||
|
}
|
||||||
|
FieldPN->addIncoming(NewVal, BB);
|
||||||
|
}
|
||||||
|
PHIsForField[i] = FieldPN;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since PHIsForField specifies a phi for every input value, the lazy inserter
|
||||||
|
// will never insert a load.
|
||||||
while (!PN->use_empty())
|
while (!PN->use_empty())
|
||||||
RewriteHeapSROALoadUser(Load, PN->use_back(),
|
RewriteHeapSROALoadUser(Load, PN->use_back(), FieldGlobals, PHIsForField);
|
||||||
FieldGlobals, InsertedLoadsForPtr);
|
|
||||||
PN->eraseFromParent();
|
PN->eraseFromParent();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1193,7 +1241,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
|
|||||||
// This the structure has an unreasonable number of fields, leave it
|
// This the structure has an unreasonable number of fields, leave it
|
||||||
// alone.
|
// alone.
|
||||||
if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 &&
|
if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 &&
|
||||||
GlobalLoadUsesSimpleEnoughForHeapSRA(GV)) {
|
GlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) {
|
||||||
GVI = PerformHeapAllocSRoA(GV, MI);
|
GVI = PerformHeapAllocSRoA(GV, MI);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user