mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
Clean whitespaces.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160668 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8899d5c6fb
commit
a94d6e87c4
@ -9,7 +9,7 @@
|
||||
//
|
||||
// This file implements the Aggressive Dead Code Elimination pass. This pass
|
||||
// optimistically assumes that all instructions are dead until proven otherwise,
|
||||
// allowing it to eliminate dead computations that other DCE passes do not
|
||||
// allowing it to eliminate dead computations that other DCE passes do not
|
||||
// catch, particularly involving loop computations.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -36,13 +36,13 @@ namespace {
|
||||
ADCE() : FunctionPass(ID) {
|
||||
initializeADCEPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
||||
virtual bool runOnFunction(Function& F);
|
||||
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage& AU) const {
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@ -52,7 +52,7 @@ INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
|
||||
bool ADCE::runOnFunction(Function& F) {
|
||||
SmallPtrSet<Instruction*, 128> alive;
|
||||
SmallVector<Instruction*, 128> worklist;
|
||||
|
||||
|
||||
// Collect the set of "root" instructions that are known live.
|
||||
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
|
||||
if (isa<TerminatorInst>(I.getInstructionIterator()) ||
|
||||
@ -62,7 +62,7 @@ bool ADCE::runOnFunction(Function& F) {
|
||||
alive.insert(I.getInstructionIterator());
|
||||
worklist.push_back(I.getInstructionIterator());
|
||||
}
|
||||
|
||||
|
||||
// Propagate liveness backwards to operands.
|
||||
while (!worklist.empty()) {
|
||||
Instruction* curr = worklist.pop_back_val();
|
||||
@ -72,7 +72,7 @@ bool ADCE::runOnFunction(Function& F) {
|
||||
if (alive.insert(Inst))
|
||||
worklist.push_back(Inst);
|
||||
}
|
||||
|
||||
|
||||
// The inverse of the live set is the dead set. These are those instructions
|
||||
// which have no side effects and do not influence the control flow or return
|
||||
// value of the function, and may therefore be deleted safely.
|
||||
@ -82,7 +82,7 @@ bool ADCE::runOnFunction(Function& F) {
|
||||
worklist.push_back(I.getInstructionIterator());
|
||||
I->dropAllReferences();
|
||||
}
|
||||
|
||||
|
||||
for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
|
||||
E = worklist.end(); I != E; ++I) {
|
||||
++NumRemoved;
|
||||
|
@ -83,7 +83,7 @@ namespace {
|
||||
const TargetLibraryInfo *TLInfo;
|
||||
DominatorTree *DT;
|
||||
ProfileInfo *PFI;
|
||||
|
||||
|
||||
/// CurInstIterator - As we scan instructions optimizing them, this is the
|
||||
/// next instruction to optimize. Xforms that can invalidate this should
|
||||
/// update it.
|
||||
@ -157,7 +157,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
||||
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
|
||||
|
||||
// llvm.dbg.value is far away from the value then iSel may not be able
|
||||
// handle it properly. iSel will drop llvm.dbg.value if it can not
|
||||
// handle it properly. iSel will drop llvm.dbg.value if it can not
|
||||
// find a node corresponding to the value.
|
||||
EverMadeChange |= PlaceDbgValues(F);
|
||||
|
||||
@ -336,7 +336,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
|
||||
|
||||
if (isEntry && BB != &BB->getParent()->getEntryBlock())
|
||||
BB->moveBefore(&BB->getParent()->getEntryBlock());
|
||||
|
||||
|
||||
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
|
||||
return;
|
||||
}
|
||||
@ -547,7 +547,7 @@ protected:
|
||||
|
||||
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
|
||||
BasicBlock *BB = CI->getParent();
|
||||
|
||||
|
||||
// Lower inline assembly if we can.
|
||||
// If we found an inline asm expession, and if the target knows how to
|
||||
// lower it to normal LLVM code, do so now.
|
||||
@ -564,19 +564,19 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
|
||||
if (OptimizeInlineAsmInst(CI))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Lower all uses of llvm.objectsize.*
|
||||
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
|
||||
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
|
||||
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
|
||||
Type *ReturnTy = CI->getType();
|
||||
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
|
||||
|
||||
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
|
||||
|
||||
// Substituting this can cause recursive simplifications, which can
|
||||
// invalidate our iterator. Use a WeakVH to hold onto it in case this
|
||||
// happens.
|
||||
WeakVH IterHandle(CurInstIterator);
|
||||
|
||||
|
||||
replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
|
||||
TLInfo, ModifiedDT ? 0 : DT);
|
||||
|
||||
@ -604,7 +604,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
|
||||
// We'll need TargetData from here on out.
|
||||
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
|
||||
if (!TD) return false;
|
||||
|
||||
|
||||
// Lower all default uses of _chk calls. This is very similar
|
||||
// to what InstCombineCalls does, but here we are only lowering calls
|
||||
// that have the default "don't know" as the objectsize. Anything else
|
||||
@ -760,13 +760,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
|
||||
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
Type *AccessTy) {
|
||||
Value *Repl = Addr;
|
||||
|
||||
// Try to collapse single-value PHI nodes. This is necessary to undo
|
||||
|
||||
// Try to collapse single-value PHI nodes. This is necessary to undo
|
||||
// unprofitable PRE transformations.
|
||||
SmallVector<Value*, 8> worklist;
|
||||
SmallPtrSet<Value*, 16> Visited;
|
||||
worklist.push_back(Addr);
|
||||
|
||||
|
||||
// Use a worklist to iteratively look through PHI nodes, and ensure that
|
||||
// the addressing mode obtained from the non-PHI roots of the graph
|
||||
// are equivalent.
|
||||
@ -778,20 +778,20 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
while (!worklist.empty()) {
|
||||
Value *V = worklist.back();
|
||||
worklist.pop_back();
|
||||
|
||||
|
||||
// Break use-def graph loops.
|
||||
if (!Visited.insert(V)) {
|
||||
Consensus = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// For a PHI node, push all of its incoming values.
|
||||
if (PHINode *P = dyn_cast<PHINode>(V)) {
|
||||
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
|
||||
worklist.push_back(P->getIncomingValue(i));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// For non-PHIs, determine the addressing mode being computed.
|
||||
SmallVector<Instruction*, 16> NewAddrModeInsts;
|
||||
ExtAddrMode NewAddrMode =
|
||||
@ -826,15 +826,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
Consensus = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// If the addressing mode couldn't be determined, or if multiple different
|
||||
// ones were determined, bail out now.
|
||||
if (!Consensus) return false;
|
||||
|
||||
|
||||
// Check to see if any of the instructions supersumed by this addr mode are
|
||||
// non-local to I's BB.
|
||||
bool AnyNonLocal = false;
|
||||
@ -943,7 +943,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
// Use a WeakVH to hold onto it in case this happens.
|
||||
WeakVH IterHandle(CurInstIterator);
|
||||
BasicBlock *BB = CurInstIterator->getParent();
|
||||
|
||||
|
||||
RecursivelyDeleteTriviallyDeadInstructions(Repl);
|
||||
|
||||
if (IterHandle != CurInstIterator) {
|
||||
@ -955,7 +955,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
// This address is now available for reassignment, so erase the table
|
||||
// entry; we don't want to match some completely different instruction.
|
||||
SunkAddrs[Addr] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
++NumMemoryInsts;
|
||||
return true;
|
||||
@ -967,12 +967,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
|
||||
bool MadeChange = false;
|
||||
|
||||
TargetLowering::AsmOperandInfoVector
|
||||
TargetLowering::AsmOperandInfoVector
|
||||
TargetConstraints = TLI->ParseConstraints(CS);
|
||||
unsigned ArgNo = 0;
|
||||
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
|
||||
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
|
||||
|
||||
|
||||
// Compute the constraint code and ConstraintType to use.
|
||||
TLI->ComputeConstraintToUse(OpInfo, SDValue());
|
||||
|
||||
@ -1187,7 +1187,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (CastInst *CI = dyn_cast<CastInst>(I)) {
|
||||
// If the source of the cast is a constant, then this should have
|
||||
// already been constant folded. The only reason NOT to constant fold
|
||||
@ -1207,23 +1207,23 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (CmpInst *CI = dyn_cast<CmpInst>(I))
|
||||
return OptimizeCmpExpression(CI);
|
||||
|
||||
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
if (TLI)
|
||||
return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||
if (TLI)
|
||||
return OptimizeMemoryInst(I, SI->getOperand(1),
|
||||
SI->getOperand(0)->getType());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
|
||||
if (GEPI->hasAllZeroIndices()) {
|
||||
/// The GEP operand must be a pointer, so must its result -> BitCast
|
||||
@ -1237,7 +1237,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (CallInst *CI = dyn_cast<CallInst>(I))
|
||||
return OptimizeCallInst(CI);
|
||||
|
||||
@ -1265,7 +1265,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
|
||||
}
|
||||
|
||||
// llvm.dbg.value is far away from the value then iSel may not be able
|
||||
// handle it properly. iSel will drop llvm.dbg.value if it can not
|
||||
// handle it properly. iSel will drop llvm.dbg.value if it can not
|
||||
// find a node corresponding to the value.
|
||||
bool CodeGenPrepare::PlaceDbgValues(Function &F) {
|
||||
bool MadeChange = false;
|
||||
|
@ -248,7 +248,7 @@ static bool isShortenable(Instruction *I) {
|
||||
// Don't shorten stores for now
|
||||
if (isa<StoreInst>(I))
|
||||
return false;
|
||||
|
||||
|
||||
IntrinsicInst *II = cast<IntrinsicInst>(I);
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: return false;
|
||||
@ -292,7 +292,7 @@ namespace {
|
||||
|
||||
/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
|
||||
/// completely overwrites a store to the 'Earlier' location.
|
||||
/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
|
||||
/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
|
||||
/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
|
||||
static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
|
||||
const AliasAnalysis::Location &Earlier,
|
||||
@ -315,7 +315,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
|
||||
if (AA.getTargetData() == 0 &&
|
||||
Later.Ptr->getType() == Earlier.Ptr->getType())
|
||||
return OverwriteComplete;
|
||||
|
||||
|
||||
return OverwriteUnknown;
|
||||
}
|
||||
|
||||
@ -381,7 +381,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
|
||||
Later.Size > Earlier.Size &&
|
||||
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
|
||||
return OverwriteComplete;
|
||||
|
||||
|
||||
// The other interesting case is if the later store overwrites the end of
|
||||
// the earlier store
|
||||
//
|
||||
@ -520,11 +520,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
|
||||
// If we find a write that is a) removable (i.e., non-volatile), b) is
|
||||
// completely obliterated by the store to 'Loc', and c) which we know that
|
||||
// 'Inst' doesn't load from, then we can remove it.
|
||||
if (isRemovable(DepWrite) &&
|
||||
if (isRemovable(DepWrite) &&
|
||||
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
|
||||
int64_t InstWriteOffset, DepWriteOffset;
|
||||
OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
|
||||
DepWriteOffset, InstWriteOffset);
|
||||
int64_t InstWriteOffset, DepWriteOffset;
|
||||
OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
|
||||
DepWriteOffset, InstWriteOffset);
|
||||
if (OR == OverwriteComplete) {
|
||||
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
|
||||
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
|
||||
@ -533,7 +533,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
|
||||
DeleteDeadInstruction(DepWrite, *MD);
|
||||
++NumFastStores;
|
||||
MadeChange = true;
|
||||
|
||||
|
||||
// DeleteDeadInstruction can delete the current instruction in loop
|
||||
// cases, reset BBI.
|
||||
BBI = Inst;
|
||||
@ -551,16 +551,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
|
||||
unsigned DepWriteAlign = DepIntrinsic->getAlignment();
|
||||
if (llvm::isPowerOf2_64(InstWriteOffset) ||
|
||||
((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
|
||||
|
||||
|
||||
DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
|
||||
<< *DepWrite << "\n KILLER (offset "
|
||||
<< InstWriteOffset << ", "
|
||||
<< *DepWrite << "\n KILLER (offset "
|
||||
<< InstWriteOffset << ", "
|
||||
<< DepLoc.Size << ")"
|
||||
<< *Inst << '\n');
|
||||
|
||||
|
||||
Value* DepWriteLength = DepIntrinsic->getLength();
|
||||
Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
|
||||
InstWriteOffset -
|
||||
InstWriteOffset -
|
||||
DepWriteOffset);
|
||||
DepIntrinsic->setLength(TrimmedLength);
|
||||
MadeChange = true;
|
||||
|
@ -39,7 +39,7 @@ static unsigned getHash(const void *V) {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SimpleValue
|
||||
// SimpleValue
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
@ -47,16 +47,16 @@ namespace {
|
||||
/// scoped hash table.
|
||||
struct SimpleValue {
|
||||
Instruction *Inst;
|
||||
|
||||
|
||||
SimpleValue(Instruction *I) : Inst(I) {
|
||||
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
|
||||
}
|
||||
|
||||
|
||||
bool isSentinel() const {
|
||||
return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
|
||||
Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
|
||||
}
|
||||
|
||||
|
||||
static bool canHandle(Instruction *Inst) {
|
||||
// This can only handle non-void readnone functions.
|
||||
if (CallInst *CI = dyn_cast<CallInst>(Inst))
|
||||
@ -90,7 +90,7 @@ template<> struct DenseMapInfo<SimpleValue> {
|
||||
|
||||
unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
|
||||
Instruction *Inst = Val.Inst;
|
||||
|
||||
|
||||
// Hash in all of the operands as pointers.
|
||||
unsigned Res = 0;
|
||||
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
|
||||
@ -126,13 +126,13 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
|
||||
|
||||
if (LHS.isSentinel() || RHS.isSentinel())
|
||||
return LHSI == RHSI;
|
||||
|
||||
|
||||
if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
|
||||
return LHSI->isIdenticalTo(RHSI);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CallValue
|
||||
// CallValue
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
@ -140,21 +140,21 @@ namespace {
|
||||
/// the scoped hash table.
|
||||
struct CallValue {
|
||||
Instruction *Inst;
|
||||
|
||||
|
||||
CallValue(Instruction *I) : Inst(I) {
|
||||
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
|
||||
}
|
||||
|
||||
|
||||
bool isSentinel() const {
|
||||
return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
|
||||
Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
|
||||
}
|
||||
|
||||
|
||||
static bool canHandle(Instruction *Inst) {
|
||||
// Don't value number anything that returns void.
|
||||
if (Inst->getType()->isVoidTy())
|
||||
return false;
|
||||
|
||||
|
||||
CallInst *CI = dyn_cast<CallInst>(Inst);
|
||||
if (CI == 0 || !CI->onlyReadsMemory())
|
||||
return false;
|
||||
@ -168,7 +168,7 @@ namespace llvm {
|
||||
template<> struct isPodLike<CallValue> {
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
|
||||
template<> struct DenseMapInfo<CallValue> {
|
||||
static inline CallValue getEmptyKey() {
|
||||
return DenseMapInfo<Instruction*>::getEmptyKey();
|
||||
@ -189,7 +189,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
|
||||
"Cannot value number calls with metadata operands");
|
||||
Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
|
||||
}
|
||||
|
||||
|
||||
// Mix in the opcode.
|
||||
return (Res << 1) ^ Inst->getOpcode();
|
||||
}
|
||||
@ -203,11 +203,11 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// EarlyCSE pass.
|
||||
// EarlyCSE pass.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
|
||||
|
||||
/// EarlyCSE - This pass does a simple depth-first walk over the dominator
|
||||
/// tree, eliminating trivially redundant instructions and using instsimplify
|
||||
/// to canonicalize things as it goes. It is intended to be fast and catch
|
||||
@ -223,14 +223,14 @@ public:
|
||||
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
|
||||
typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
|
||||
AllocatorTy> ScopedHTType;
|
||||
|
||||
|
||||
/// AvailableValues - This scoped hash table contains the current values of
|
||||
/// all of our simple scalar expressions. As we walk down the domtree, we
|
||||
/// look to see if instructions are in this: if so, we replace them with what
|
||||
/// we find, otherwise we insert them so that dominated values can succeed in
|
||||
/// their lookup.
|
||||
ScopedHTType *AvailableValues;
|
||||
|
||||
|
||||
/// AvailableLoads - This scoped hash table contains the current values
|
||||
/// of loads. This allows us to get efficient access to dominating loads when
|
||||
/// we have a fully redundant load. In addition to the most recent load, we
|
||||
@ -243,15 +243,15 @@ public:
|
||||
typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
|
||||
DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
|
||||
LoadHTType *AvailableLoads;
|
||||
|
||||
|
||||
/// AvailableCalls - This scoped hash table contains the current values
|
||||
/// of read-only call values. It uses the same generation count as loads.
|
||||
typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
|
||||
CallHTType *AvailableCalls;
|
||||
|
||||
|
||||
/// CurrentGeneration - This is the current generation of the memory value.
|
||||
unsigned CurrentGeneration;
|
||||
|
||||
|
||||
static char ID;
|
||||
explicit EarlyCSE() : FunctionPass(ID) {
|
||||
initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
|
||||
@ -326,7 +326,7 @@ private:
|
||||
};
|
||||
|
||||
bool processNode(DomTreeNode *Node);
|
||||
|
||||
|
||||
// This transformation requires dominator postdominator info
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<DominatorTree>();
|
||||
@ -350,7 +350,7 @@ INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
|
||||
|
||||
bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
BasicBlock *BB = Node->getBlock();
|
||||
|
||||
|
||||
// If this block has a single predecessor, then the predecessor is the parent
|
||||
// of the domtree node and all of the live out memory values are still current
|
||||
// in this block. If this block has multiple predecessors, then they could
|
||||
@ -359,20 +359,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
// predecessors.
|
||||
if (BB->getSinglePredecessor() == 0)
|
||||
++CurrentGeneration;
|
||||
|
||||
|
||||
/// LastStore - Keep track of the last non-volatile store that we saw... for
|
||||
/// as long as there in no instruction that reads memory. If we see a store
|
||||
/// to the same location, we delete the dead store. This zaps trivial dead
|
||||
/// stores which can occur in bitfield code among other things.
|
||||
StoreInst *LastStore = 0;
|
||||
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
// See if any instructions in the block can be eliminated. If so, do it. If
|
||||
// not, add them to AvailableValues.
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
|
||||
Instruction *Inst = I++;
|
||||
|
||||
|
||||
// Dead instructions should just be removed.
|
||||
if (isInstructionTriviallyDead(Inst)) {
|
||||
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
|
||||
@ -381,7 +381,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
++NumSimplify;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
|
||||
// its simpler value.
|
||||
if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
|
||||
@ -392,7 +392,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
++NumSimplify;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If this is a simple instruction that we can value number, process it.
|
||||
if (SimpleValue::canHandle(Inst)) {
|
||||
// See if the instruction has an available value. If so, use it.
|
||||
@ -404,12 +404,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
++NumCSE;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, just remember that this value is available.
|
||||
AvailableValues->insert(Inst, Inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If this is a non-volatile load, process it.
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
|
||||
// Ignore volatile loads.
|
||||
@ -417,7 +417,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
LastStore = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If we have an available version of this load, and if it is the right
|
||||
// generation, replace this instruction.
|
||||
std::pair<Value*, unsigned> InVal =
|
||||
@ -431,18 +431,18 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
++NumCSELoad;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, remember that we have this instruction.
|
||||
AvailableLoads->insert(Inst->getOperand(0),
|
||||
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
|
||||
LastStore = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If this instruction may read from memory, forget LastStore.
|
||||
if (Inst->mayReadFromMemory())
|
||||
LastStore = 0;
|
||||
|
||||
|
||||
// If this is a read-only call, process it.
|
||||
if (CallValue::canHandle(Inst)) {
|
||||
// If we have an available version of this call, and if it is the right
|
||||
@ -457,19 +457,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
++NumCSECall;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, remember that we have this instruction.
|
||||
AvailableCalls->insert(Inst,
|
||||
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Okay, this isn't something we can CSE at all. Check to see if it is
|
||||
// something that could modify memory. If so, our available memory values
|
||||
// cannot be used so bump the generation count.
|
||||
if (Inst->mayWriteToMemory()) {
|
||||
++CurrentGeneration;
|
||||
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
|
||||
// We do a trivial form of DSE if there are two stores to the same
|
||||
// location with no intervening loads. Delete the earlier store.
|
||||
@ -483,7 +483,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
LastStore = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Okay, we just invalidated anything we knew about loaded values. Try
|
||||
// to salvage *something* by remembering that the stored value is a live
|
||||
// version of the pointer. It is safe to forward from volatile stores
|
||||
@ -491,7 +491,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
// the store.
|
||||
AvailableLoads->insert(SI->getPointerOperand(),
|
||||
std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
|
||||
|
||||
|
||||
// Remember that this was the last store we saw for DSE.
|
||||
if (SI->isSimple())
|
||||
LastStore = SI;
|
||||
@ -509,7 +509,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
|
||||
TD = getAnalysisIfAvailable<TargetData>();
|
||||
TLI = &getAnalysis<TargetLibraryInfo>();
|
||||
DT = &getAnalysis<DominatorTree>();
|
||||
|
||||
|
||||
// Tables that the pass uses when walking the domtree.
|
||||
ScopedHTType AVTable;
|
||||
AvailableValues = &AVTable;
|
||||
@ -517,7 +517,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
|
||||
AvailableLoads = &LoadTable;
|
||||
CallHTType CallTable;
|
||||
AvailableCalls = &CallTable;
|
||||
|
||||
|
||||
CurrentGeneration = 0;
|
||||
bool Changed = false;
|
||||
|
||||
|
@ -173,7 +173,7 @@ Expression ValueTable::create_expression(Instruction *I) {
|
||||
if (e.varargs[0] > e.varargs[1])
|
||||
std::swap(e.varargs[0], e.varargs[1]);
|
||||
}
|
||||
|
||||
|
||||
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
|
||||
// Sort the operand value numbers so x<y and y>x get the same value number.
|
||||
CmpInst::Predicate Predicate = C->getPredicate();
|
||||
@ -187,7 +187,7 @@ Expression ValueTable::create_expression(Instruction *I) {
|
||||
II != IE; ++II)
|
||||
e.varargs.push_back(*II);
|
||||
}
|
||||
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
@ -391,7 +391,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
|
||||
valueNumbering[V] = nextValueNumber;
|
||||
return nextValueNumber++;
|
||||
}
|
||||
|
||||
|
||||
Instruction* I = cast<Instruction>(V);
|
||||
Expression exp;
|
||||
switch (I->getOpcode()) {
|
||||
@ -507,7 +507,7 @@ namespace {
|
||||
const TargetLibraryInfo *TLI;
|
||||
|
||||
ValueTable VN;
|
||||
|
||||
|
||||
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
|
||||
/// have that value number. Use findLeader to query it.
|
||||
struct LeaderTableEntry {
|
||||
@ -517,7 +517,7 @@ namespace {
|
||||
};
|
||||
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
|
||||
BumpPtrAllocator TableAllocator;
|
||||
|
||||
|
||||
SmallVector<Instruction*, 8> InstrsToErase;
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
@ -527,14 +527,14 @@ namespace {
|
||||
}
|
||||
|
||||
bool runOnFunction(Function &F);
|
||||
|
||||
|
||||
/// markInstructionForDeletion - This removes the specified instruction from
|
||||
/// our various maps and marks it for deletion.
|
||||
void markInstructionForDeletion(Instruction *I) {
|
||||
VN.erase(I);
|
||||
InstrsToErase.push_back(I);
|
||||
}
|
||||
|
||||
|
||||
const TargetData *getTargetData() const { return TD; }
|
||||
DominatorTree &getDominatorTree() const { return *DT; }
|
||||
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
|
||||
@ -549,14 +549,14 @@ namespace {
|
||||
Curr.BB = BB;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
|
||||
Node->Val = V;
|
||||
Node->BB = BB;
|
||||
Node->Next = Curr.Next;
|
||||
Curr.Next = Node;
|
||||
}
|
||||
|
||||
|
||||
/// removeFromLeaderTable - Scan the list of values corresponding to a given
|
||||
/// value number, and remove the given instruction if encountered.
|
||||
void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
|
||||
@ -567,7 +567,7 @@ namespace {
|
||||
Prev = Curr;
|
||||
Curr = Curr->Next;
|
||||
}
|
||||
|
||||
|
||||
if (Prev) {
|
||||
Prev->Next = Curr->Next;
|
||||
} else {
|
||||
@ -597,7 +597,7 @@ namespace {
|
||||
AU.addPreserved<DominatorTree>();
|
||||
AU.addPreserved<AliasAnalysis>();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Helper fuctions
|
||||
// FIXME: eliminate or document these better
|
||||
@ -735,15 +735,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
|
||||
StoredVal->getType()->isStructTy() ||
|
||||
StoredVal->getType()->isArrayTy())
|
||||
return false;
|
||||
|
||||
|
||||
// The store has to be at least as big as the load.
|
||||
if (TD.getTypeSizeInBits(StoredVal->getType()) <
|
||||
TD.getTypeSizeInBits(LoadTy))
|
||||
return false;
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
|
||||
/// then a load from a must-aliased pointer of a different type, try to coerce
|
||||
@ -751,80 +751,80 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
|
||||
/// InsertPt is the place to insert new instructions.
|
||||
///
|
||||
/// If we can't do it, return null.
|
||||
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
|
||||
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
|
||||
Type *LoadedTy,
|
||||
Instruction *InsertPt,
|
||||
const TargetData &TD) {
|
||||
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
|
||||
return 0;
|
||||
|
||||
|
||||
// If this is already the right type, just return it.
|
||||
Type *StoredValTy = StoredVal->getType();
|
||||
|
||||
|
||||
uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
|
||||
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
|
||||
|
||||
|
||||
// If the store and reload are the same size, we can always reuse it.
|
||||
if (StoreSize == LoadSize) {
|
||||
// Pointer to Pointer -> use bitcast.
|
||||
if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
|
||||
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
|
||||
|
||||
|
||||
// Convert source pointers to integers, which can be bitcast.
|
||||
if (StoredValTy->isPointerTy()) {
|
||||
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
|
||||
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
|
||||
}
|
||||
|
||||
|
||||
Type *TypeToCastTo = LoadedTy;
|
||||
if (TypeToCastTo->isPointerTy())
|
||||
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
|
||||
|
||||
|
||||
if (StoredValTy != TypeToCastTo)
|
||||
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
|
||||
|
||||
|
||||
// Cast to pointer if the load needs a pointer type.
|
||||
if (LoadedTy->isPointerTy())
|
||||
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
|
||||
|
||||
|
||||
return StoredVal;
|
||||
}
|
||||
|
||||
|
||||
// If the loaded value is smaller than the available value, then we can
|
||||
// extract out a piece from it. If the available value is too small, then we
|
||||
// can't do anything.
|
||||
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
|
||||
|
||||
|
||||
// Convert source pointers to integers, which can be manipulated.
|
||||
if (StoredValTy->isPointerTy()) {
|
||||
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
|
||||
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
|
||||
}
|
||||
|
||||
|
||||
// Convert vectors and fp to integer, which can be manipulated.
|
||||
if (!StoredValTy->isIntegerTy()) {
|
||||
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
|
||||
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
|
||||
}
|
||||
|
||||
|
||||
// If this is a big-endian system, we need to shift the value down to the low
|
||||
// bits so that a truncate will work.
|
||||
if (TD.isBigEndian()) {
|
||||
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
|
||||
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
|
||||
}
|
||||
|
||||
|
||||
// Truncate the integer to the right size now.
|
||||
Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
|
||||
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
|
||||
|
||||
|
||||
if (LoadedTy == NewIntTy)
|
||||
return StoredVal;
|
||||
|
||||
|
||||
// If the result is a pointer, inttoptr.
|
||||
if (LoadedTy->isPointerTy())
|
||||
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
|
||||
|
||||
|
||||
// Otherwise, bitcast.
|
||||
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
|
||||
}
|
||||
@ -845,13 +845,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
|
||||
// to transform them. We need to be able to bitcast to integer.
|
||||
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
|
||||
return -1;
|
||||
|
||||
|
||||
int64_t StoreOffset = 0, LoadOffset = 0;
|
||||
Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
|
||||
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
|
||||
if (StoreBase != LoadBase)
|
||||
return -1;
|
||||
|
||||
|
||||
// If the load and store are to the exact same address, they should have been
|
||||
// a must alias. AA must have gotten confused.
|
||||
// FIXME: Study to see if/when this happens. One case is forwarding a memset
|
||||
@ -866,18 +866,18 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// If the load and store don't overlap at all, the store doesn't provide
|
||||
// anything to the load. In this case, they really don't alias at all, AA
|
||||
// must have gotten confused.
|
||||
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
|
||||
|
||||
|
||||
if ((WriteSizeInBits & 7) | (LoadSize & 7))
|
||||
return -1;
|
||||
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
|
||||
LoadSize >>= 3;
|
||||
|
||||
|
||||
|
||||
|
||||
bool isAAFailure = false;
|
||||
if (StoreOffset < LoadOffset)
|
||||
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
|
||||
@ -895,7 +895,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// If the Load isn't completely contained within the stored bits, we don't
|
||||
// have all the bits to feed it. We could do something crazy in the future
|
||||
// (issue a smaller load then merge the bits in) but this seems unlikely to be
|
||||
@ -903,11 +903,11 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
|
||||
if (StoreOffset > LoadOffset ||
|
||||
StoreOffset+StoreSize < LoadOffset+LoadSize)
|
||||
return -1;
|
||||
|
||||
|
||||
// Okay, we can do this transformation. Return the number of bytes into the
|
||||
// store that the load is.
|
||||
return LoadOffset-StoreOffset;
|
||||
}
|
||||
}
|
||||
|
||||
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
|
||||
/// memdep query of a load that ends up being a clobbering store.
|
||||
@ -933,23 +933,23 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
|
||||
// Cannot handle reading from store of first-class aggregate yet.
|
||||
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
|
||||
return -1;
|
||||
|
||||
|
||||
Value *DepPtr = DepLI->getPointerOperand();
|
||||
uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
|
||||
int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
|
||||
if (R != -1) return R;
|
||||
|
||||
|
||||
// If we have a load/load clobber an DepLI can be widened to cover this load,
|
||||
// then we should widen it!
|
||||
int64_t LoadOffs = 0;
|
||||
const Value *LoadBase =
|
||||
GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
|
||||
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
|
||||
|
||||
|
||||
unsigned Size = MemoryDependenceAnalysis::
|
||||
getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
|
||||
if (Size == 0) return -1;
|
||||
|
||||
|
||||
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
|
||||
}
|
||||
|
||||
@ -968,29 +968,29 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
|
||||
if (MI->getIntrinsicID() == Intrinsic::memset)
|
||||
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
|
||||
MemSizeInBits, TD);
|
||||
|
||||
|
||||
// If we have a memcpy/memmove, the only case we can handle is if this is a
|
||||
// copy from constant memory. In that case, we can read directly from the
|
||||
// constant memory.
|
||||
MemTransferInst *MTI = cast<MemTransferInst>(MI);
|
||||
|
||||
|
||||
Constant *Src = dyn_cast<Constant>(MTI->getSource());
|
||||
if (Src == 0) return -1;
|
||||
|
||||
|
||||
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
|
||||
if (GV == 0 || !GV->isConstant()) return -1;
|
||||
|
||||
|
||||
// See if the access is within the bounds of the transfer.
|
||||
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
|
||||
MI->getDest(), MemSizeInBits, TD);
|
||||
if (Offset == -1)
|
||||
return Offset;
|
||||
|
||||
|
||||
// Otherwise, see if we can constant fold a load from the constant with the
|
||||
// offset applied as appropriate.
|
||||
Src = ConstantExpr::getBitCast(Src,
|
||||
llvm::Type::getInt8PtrTy(Src->getContext()));
|
||||
Constant *OffsetCst =
|
||||
Constant *OffsetCst =
|
||||
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
|
||||
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
|
||||
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
|
||||
@ -998,7 +998,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
|
||||
return Offset;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// GetStoreValueForLoad - This function is called when we have a
|
||||
/// memdep query of a load that ends up being a clobbering store. This means
|
||||
@ -1009,32 +1009,32 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
|
||||
Type *LoadTy,
|
||||
Instruction *InsertPt, const TargetData &TD){
|
||||
LLVMContext &Ctx = SrcVal->getType()->getContext();
|
||||
|
||||
|
||||
uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
|
||||
uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
|
||||
|
||||
|
||||
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
|
||||
|
||||
|
||||
// Compute which bits of the stored value are being used by the load. Convert
|
||||
// to an integer type to start with.
|
||||
if (SrcVal->getType()->isPointerTy())
|
||||
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
|
||||
if (!SrcVal->getType()->isIntegerTy())
|
||||
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
|
||||
|
||||
|
||||
// Shift the bits to the least significant depending on endianness.
|
||||
unsigned ShiftAmt;
|
||||
if (TD.isLittleEndian())
|
||||
ShiftAmt = Offset*8;
|
||||
else
|
||||
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
|
||||
|
||||
|
||||
if (ShiftAmt)
|
||||
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
|
||||
|
||||
|
||||
if (LoadSize != StoreSize)
|
||||
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
|
||||
|
||||
|
||||
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
|
||||
}
|
||||
|
||||
@ -1061,14 +1061,14 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
|
||||
NewLoadSize = NextPowerOf2(NewLoadSize);
|
||||
|
||||
Value *PtrVal = SrcVal->getPointerOperand();
|
||||
|
||||
|
||||
// Insert the new load after the old load. This ensures that subsequent
|
||||
// memdep queries will find the new load. We can't easily remove the old
|
||||
// load completely because it is already in the value numbering table.
|
||||
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
|
||||
Type *DestPTy =
|
||||
Type *DestPTy =
|
||||
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
|
||||
DestPTy = PointerType::get(DestPTy,
|
||||
DestPTy = PointerType::get(DestPTy,
|
||||
cast<PointerType>(PtrVal->getType())->getAddressSpace());
|
||||
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
|
||||
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
|
||||
@ -1078,7 +1078,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
|
||||
|
||||
DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
|
||||
DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
|
||||
|
||||
|
||||
// Replace uses of the original load with the wider load. On a big endian
|
||||
// system, we need to shift down to get the relevant bits.
|
||||
Value *RV = NewLoad;
|
||||
@ -1087,7 +1087,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
|
||||
NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
|
||||
RV = Builder.CreateTrunc(RV, SrcVal->getType());
|
||||
SrcVal->replaceAllUsesWith(RV);
|
||||
|
||||
|
||||
// We would like to use gvn.markInstructionForDeletion here, but we can't
|
||||
// because the load is already memoized into the leader map table that GVN
|
||||
// tracks. It is potentially possible to remove the load from the table,
|
||||
@ -1096,7 +1096,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
|
||||
gvn.getMemDep().removeInstruction(SrcVal);
|
||||
SrcVal = NewLoad;
|
||||
}
|
||||
|
||||
|
||||
return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
|
||||
}
|
||||
|
||||
@ -1110,7 +1110,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
|
||||
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
|
||||
|
||||
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
|
||||
|
||||
|
||||
// We know that this method is only called when the mem transfer fully
|
||||
// provides the bits for the load.
|
||||
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
|
||||
@ -1119,9 +1119,9 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
|
||||
Value *Val = MSI->getValue();
|
||||
if (LoadSize != 1)
|
||||
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
|
||||
|
||||
|
||||
Value *OneElt = Val;
|
||||
|
||||
|
||||
// Splat the value out to the right number of bits.
|
||||
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
|
||||
// If we can double the number of bytes set, do it.
|
||||
@ -1131,16 +1131,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
|
||||
NumBytesSet <<= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise insert one byte at a time.
|
||||
Value *ShVal = Builder.CreateShl(Val, 1*8);
|
||||
Val = Builder.CreateOr(OneElt, ShVal);
|
||||
++NumBytesSet;
|
||||
}
|
||||
|
||||
|
||||
return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, this is a memcpy/memmove from a constant global.
|
||||
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
|
||||
Constant *Src = cast<Constant>(MTI->getSource());
|
||||
@ -1149,7 +1149,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
|
||||
// offset applied as appropriate.
|
||||
Src = ConstantExpr::getBitCast(Src,
|
||||
llvm::Type::getInt8PtrTy(Src->getContext()));
|
||||
Constant *OffsetCst =
|
||||
Constant *OffsetCst =
|
||||
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
|
||||
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
|
||||
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
|
||||
@ -1166,13 +1166,13 @@ struct AvailableValueInBlock {
|
||||
LoadVal, // A value produced by a load.
|
||||
MemIntrin // A memory intrinsic which is loaded from.
|
||||
};
|
||||
|
||||
|
||||
/// V - The value that is live out of the block.
|
||||
PointerIntPair<Value *, 2, ValType> Val;
|
||||
|
||||
|
||||
/// Offset - The byte offset in Val that is interesting for the load query.
|
||||
unsigned Offset;
|
||||
|
||||
|
||||
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
|
||||
unsigned Offset = 0) {
|
||||
AvailableValueInBlock Res;
|
||||
@ -1192,7 +1192,7 @@ struct AvailableValueInBlock {
|
||||
Res.Offset = Offset;
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
|
||||
unsigned Offset = 0) {
|
||||
AvailableValueInBlock Res;
|
||||
@ -1211,17 +1211,17 @@ struct AvailableValueInBlock {
|
||||
assert(isSimpleValue() && "Wrong accessor");
|
||||
return Val.getPointer();
|
||||
}
|
||||
|
||||
|
||||
LoadInst *getCoercedLoadValue() const {
|
||||
assert(isCoercedLoadValue() && "Wrong accessor");
|
||||
return cast<LoadInst>(Val.getPointer());
|
||||
}
|
||||
|
||||
|
||||
MemIntrinsic *getMemIntrinValue() const {
|
||||
assert(isMemIntrinValue() && "Wrong accessor");
|
||||
return cast<MemIntrinsic>(Val.getPointer());
|
||||
}
|
||||
|
||||
|
||||
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
|
||||
/// defined here to the specified type. This handles various coercion cases.
|
||||
Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
|
||||
@ -1233,7 +1233,7 @@ struct AvailableValueInBlock {
|
||||
assert(TD && "Need target data to handle type mismatch case");
|
||||
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
|
||||
*TD);
|
||||
|
||||
|
||||
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
|
||||
<< *getSimpleValue() << '\n'
|
||||
<< *Res << '\n' << "\n\n\n");
|
||||
@ -1245,7 +1245,7 @@ struct AvailableValueInBlock {
|
||||
} else {
|
||||
Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
|
||||
gvn);
|
||||
|
||||
|
||||
DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
|
||||
<< *getCoercedLoadValue() << '\n'
|
||||
<< *Res << '\n' << "\n\n\n");
|
||||
@ -1268,12 +1268,12 @@ struct AvailableValueInBlock {
|
||||
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
|
||||
/// construct SSA form, allowing us to eliminate LI. This returns the value
|
||||
/// that should be used at LI's definition site.
|
||||
static Value *ConstructSSAForLoadSet(LoadInst *LI,
|
||||
static Value *ConstructSSAForLoadSet(LoadInst *LI,
|
||||
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
|
||||
GVN &gvn) {
|
||||
// Check for the fully redundant, dominating load case. In this case, we can
|
||||
// just use the dominating value directly.
|
||||
if (ValuesPerBlock.size() == 1 &&
|
||||
if (ValuesPerBlock.size() == 1 &&
|
||||
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
|
||||
LI->getParent()))
|
||||
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
|
||||
@ -1282,29 +1282,29 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
|
||||
SmallVector<PHINode*, 8> NewPHIs;
|
||||
SSAUpdater SSAUpdate(&NewPHIs);
|
||||
SSAUpdate.Initialize(LI->getType(), LI->getName());
|
||||
|
||||
|
||||
Type *LoadTy = LI->getType();
|
||||
|
||||
|
||||
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
|
||||
const AvailableValueInBlock &AV = ValuesPerBlock[i];
|
||||
BasicBlock *BB = AV.BB;
|
||||
|
||||
|
||||
if (SSAUpdate.HasValueForBlock(BB))
|
||||
continue;
|
||||
|
||||
SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
|
||||
}
|
||||
|
||||
|
||||
// Perform PHI construction.
|
||||
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
|
||||
|
||||
|
||||
// If new PHI nodes were created, notify alias analysis.
|
||||
if (V->getType()->isPointerTy()) {
|
||||
AliasAnalysis *AA = gvn.getAliasAnalysis();
|
||||
|
||||
|
||||
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
|
||||
AA->copyValue(LI, NewPHIs[i]);
|
||||
|
||||
|
||||
// Now that we've copied information to the new PHIs, scan through
|
||||
// them again and inform alias analysis that we've added potentially
|
||||
// escaping uses to any values that are operands to these PHIs.
|
||||
@ -1376,7 +1376,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
// the pointer operand of the load if PHI translation occurs. Make sure
|
||||
// to consider the right address.
|
||||
Value *Address = Deps[i].getAddress();
|
||||
|
||||
|
||||
// If the dependence is to a store that writes to a superset of the bits
|
||||
// read by the load, we can extract the bits we need for the load from the
|
||||
// stored value.
|
||||
@ -1392,7 +1392,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check to see if we have something like this:
|
||||
// load i32* P
|
||||
// load i8* (P+1)
|
||||
@ -1404,7 +1404,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
|
||||
LI->getPointerOperand(),
|
||||
DepLI, *TD);
|
||||
|
||||
|
||||
if (Offset != -1) {
|
||||
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
|
||||
Offset));
|
||||
@ -1423,10 +1423,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
|
||||
Offset));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
UnavailableBlocks.push_back(DepBB);
|
||||
continue;
|
||||
}
|
||||
@ -1443,7 +1443,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
UndefValue::get(LI->getType())));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
|
||||
// Reject loads and stores that are to the same address but are of
|
||||
// different types if we have to.
|
||||
@ -1461,7 +1461,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
S->getValueOperand()));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
|
||||
// If the types mismatch and we can't handle it, reject reuse of the load.
|
||||
if (LD->getType() != LI->getType()) {
|
||||
@ -1470,12 +1470,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
|
||||
UnavailableBlocks.push_back(DepBB);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
UnavailableBlocks.push_back(DepBB);
|
||||
continue;
|
||||
}
|
||||
@ -1489,7 +1489,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
// its value. Insert PHIs and remove the fully redundant value now.
|
||||
if (UnavailableBlocks.empty()) {
|
||||
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
|
||||
|
||||
|
||||
// Perform PHI construction.
|
||||
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
|
||||
LI->replaceAllUsesWith(V);
|
||||
@ -1532,10 +1532,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
return false;
|
||||
if (Blockers.count(TmpBB))
|
||||
return false;
|
||||
|
||||
|
||||
// If any of these blocks has more than one successor (i.e. if the edge we
|
||||
// just traversed was critical), then there are other paths through this
|
||||
// block along which the load may not be anticipated. Hoisting the load
|
||||
// just traversed was critical), then there are other paths through this
|
||||
// block along which the load may not be anticipated. Hoisting the load
|
||||
// above this block would be adding the load to execution paths along
|
||||
// which it was not previously executed.
|
||||
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
|
||||
@ -1613,7 +1613,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
unsigned NumUnavailablePreds = PredLoads.size();
|
||||
assert(NumUnavailablePreds != 0 &&
|
||||
"Fully available value should be eliminated above!");
|
||||
|
||||
|
||||
// If this load is unavailable in multiple predecessors, reject it.
|
||||
// FIXME: If we could restructure the CFG, we could make a common pred with
|
||||
// all the preds that don't have an available LI and insert a new load into
|
||||
@ -1690,10 +1690,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
|
||||
DEBUG(if (!NewInsts.empty())
|
||||
dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
|
||||
<< *NewInsts.back() << '\n');
|
||||
|
||||
|
||||
// Assign value numbers to the new instructions.
|
||||
for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
|
||||
// FIXME: We really _ought_ to insert these value numbers into their
|
||||
// FIXME: We really _ought_ to insert these value numbers into their
|
||||
// parent's availability map. However, in doing so, we risk getting into
|
||||
// ordering issues. If a block hasn't been processed yet, we would be
|
||||
// marking a value as AVAIL-IN, which isn't what we intend.
|
||||
@ -1795,7 +1795,7 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
markInstructionForDeletion(L);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// ... to a pointer that has been loaded from before...
|
||||
MemDepResult Dep = MD->getDependency(L);
|
||||
|
||||
@ -1821,7 +1821,7 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
|
||||
L->getType(), L, *TD);
|
||||
}
|
||||
|
||||
|
||||
// Check to see if we have something like this:
|
||||
// load i32* P
|
||||
// load i8* (P+1)
|
||||
@ -1831,14 +1831,14 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
// we have the first instruction in the entry block.
|
||||
if (DepLI == L)
|
||||
return false;
|
||||
|
||||
|
||||
int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
|
||||
L->getPointerOperand(),
|
||||
DepLI, *TD);
|
||||
if (Offset != -1)
|
||||
AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
|
||||
}
|
||||
|
||||
|
||||
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
|
||||
// a value on from it.
|
||||
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
|
||||
@ -1848,11 +1848,11 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
if (Offset != -1)
|
||||
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
|
||||
}
|
||||
|
||||
|
||||
if (AvailVal) {
|
||||
DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
|
||||
<< *AvailVal << '\n' << *L << "\n\n\n");
|
||||
|
||||
|
||||
// Replace the load!
|
||||
L->replaceAllUsesWith(AvailVal);
|
||||
if (AvailVal->getType()->isPointerTy())
|
||||
@ -1862,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If the value isn't available, don't do anything!
|
||||
if (Dep.isClobber()) {
|
||||
DEBUG(
|
||||
@ -1892,7 +1892,7 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
Instruction *DepInst = Dep.getInst();
|
||||
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
|
||||
Value *StoredVal = DepSI->getValueOperand();
|
||||
|
||||
|
||||
// The store and load are to a must-aliased pointer, but they may not
|
||||
// actually have the same type. See if we know how to reuse the stored
|
||||
// value (depending on its type).
|
||||
@ -1902,11 +1902,11 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
L, *TD);
|
||||
if (StoredVal == 0)
|
||||
return false;
|
||||
|
||||
|
||||
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
|
||||
<< '\n' << *L << "\n\n\n");
|
||||
}
|
||||
else
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1921,7 +1921,7 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
|
||||
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
|
||||
Value *AvailableVal = DepLI;
|
||||
|
||||
|
||||
// The loads are of a must-aliased pointer, but they may not actually have
|
||||
// the same type. See if we know how to reuse the previously loaded value
|
||||
// (depending on its type).
|
||||
@ -1931,14 +1931,14 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
L, *TD);
|
||||
if (AvailableVal == 0)
|
||||
return false;
|
||||
|
||||
|
||||
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
|
||||
<< "\n" << *L << "\n\n\n");
|
||||
}
|
||||
else
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Remove it!
|
||||
patchAndReplaceAllUsesWith(AvailableVal, L);
|
||||
if (DepLI->getType()->isPointerTy())
|
||||
@ -1957,7 +1957,7 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
++NumGVNLoad;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// If this load occurs either right after a lifetime begin,
|
||||
// then the loaded value is undefined.
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
|
||||
@ -1972,28 +1972,28 @@ bool GVN::processLoad(LoadInst *L) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// findLeader - In order to find a leader for a given value number at a
|
||||
// findLeader - In order to find a leader for a given value number at a
|
||||
// specific basic block, we first obtain the list of all Values for that number,
|
||||
// and then scan the list to find one whose block dominates the block in
|
||||
// and then scan the list to find one whose block dominates the block in
|
||||
// question. This is fast because dominator tree queries consist of only
|
||||
// a few comparisons of DFS numbers.
|
||||
Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
|
||||
LeaderTableEntry Vals = LeaderTable[num];
|
||||
if (!Vals.Val) return 0;
|
||||
|
||||
|
||||
Value *Val = 0;
|
||||
if (DT->dominates(Vals.BB, BB)) {
|
||||
Val = Vals.Val;
|
||||
if (isa<Constant>(Val)) return Val;
|
||||
}
|
||||
|
||||
|
||||
LeaderTableEntry* Next = Vals.Next;
|
||||
while (Next) {
|
||||
if (DT->dominates(Next->BB, BB)) {
|
||||
if (isa<Constant>(Next->Val)) return Next->Val;
|
||||
if (!Val) Val = Next->Val;
|
||||
}
|
||||
|
||||
|
||||
Next = Next->Next;
|
||||
}
|
||||
|
||||
@ -2243,7 +2243,7 @@ bool GVN::processInstruction(Instruction *I) {
|
||||
// Instructions with void type don't return a value, so there's
|
||||
// no point in trying to find redundancies in them.
|
||||
if (I->getType()->isVoidTy()) return false;
|
||||
|
||||
|
||||
uint32_t NextNum = VN.getNextUnusedValueNumber();
|
||||
unsigned Num = VN.lookup_or_add(I);
|
||||
|
||||
@ -2261,7 +2261,7 @@ bool GVN::processInstruction(Instruction *I) {
|
||||
addToLeaderTable(Num, I, I->getParent());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Perform fast-path value-number based elimination of values inherited from
|
||||
// dominators.
|
||||
Value *repl = findLeader(I->getParent(), Num);
|
||||
@ -2270,7 +2270,7 @@ bool GVN::processInstruction(Instruction *I) {
|
||||
addToLeaderTable(Num, I, I->getParent());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Remove it!
|
||||
patchAndReplaceAllUsesWith(repl, I);
|
||||
if (MD && repl->getType()->isPointerTy())
|
||||
@ -2297,7 +2297,7 @@ bool GVN::runOnFunction(Function& F) {
|
||||
// optimization opportunities.
|
||||
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
|
||||
BasicBlock *BB = FI++;
|
||||
|
||||
|
||||
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
|
||||
if (removedBlock) ++NumGVNBlocks;
|
||||
|
||||
@ -2454,7 +2454,7 @@ bool GVN::performPRE(Function &F) {
|
||||
// we would need to insert instructions in more than one pred.
|
||||
if (NumWithout != 1 || NumWith == 0)
|
||||
continue;
|
||||
|
||||
|
||||
// Don't do PRE across indirect branch.
|
||||
if (isa<IndirectBrInst>(PREPred->getTerminator()))
|
||||
continue;
|
||||
@ -2530,7 +2530,7 @@ bool GVN::performPRE(Function &F) {
|
||||
unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
|
||||
VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
|
||||
}
|
||||
|
||||
|
||||
if (MD)
|
||||
MD->invalidateCachedPointerInfo(Phi);
|
||||
}
|
||||
@ -2567,7 +2567,7 @@ bool GVN::splitCriticalEdges() {
|
||||
/// iterateOnFunction - Executes one iteration of GVN
|
||||
bool GVN::iterateOnFunction(Function &F) {
|
||||
cleanupGlobalSets();
|
||||
|
||||
|
||||
// Top-down walk of the dominator tree
|
||||
bool Changed = false;
|
||||
#if 0
|
||||
@ -2602,7 +2602,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
|
||||
I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
|
||||
const LeaderTableEntry *Node = &I->second;
|
||||
assert(Node->Val != Inst && "Inst still in value numbering scope!");
|
||||
|
||||
|
||||
while (Node->Next) {
|
||||
Node = Node->Next;
|
||||
assert(Node->Val != Inst && "Inst still in value numbering scope!");
|
||||
|
@ -12,7 +12,7 @@
|
||||
// global). Such a transformation can significantly reduce the register pressure
|
||||
// when many globals are involved.
|
||||
//
|
||||
// For example, consider the code which touches several global variables at
|
||||
// For example, consider the code which touches several global variables at
|
||||
// once:
|
||||
//
|
||||
// static int foo[N], bar[N], baz[N];
|
||||
@ -208,8 +208,8 @@ bool GlobalMerge::doInitialization(Module &M) {
|
||||
if (BSSGlobals.size() > 1)
|
||||
Changed |= doMerge(BSSGlobals, M, false);
|
||||
|
||||
// FIXME: This currently breaks the EH processing due to way how the
|
||||
// typeinfo detection works. We might want to detect the TIs and ignore
|
||||
// FIXME: This currently breaks the EH processing due to way how the
|
||||
// typeinfo detection works. We might want to detect the TIs and ignore
|
||||
// them in the future.
|
||||
// if (ConstGlobals.size() > 1)
|
||||
// Changed |= doMerge(ConstGlobals, M, true);
|
||||
|
@ -861,7 +861,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
|
||||
|
||||
// If all of the loads and stores that feed the value have the same TBAA tag,
|
||||
// then we can propagate it onto any newly inserted loads.
|
||||
MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
|
||||
MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
|
||||
|
||||
SmallPtrSet<BasicBlock*, 8> PredsScanned;
|
||||
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
|
||||
@ -887,7 +887,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
|
||||
OneUnavailablePred = PredBB;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If tbaa tags disagree or are not present, forget about them.
|
||||
if (TBAATag != ThisTBAATag) TBAATag = 0;
|
||||
|
||||
@ -951,7 +951,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
|
||||
NewVal->setDebugLoc(LI->getDebugLoc());
|
||||
if (TBAATag)
|
||||
NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
|
||||
|
||||
|
||||
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
|
||||
}
|
||||
|
||||
|
@ -32,10 +32,10 @@ namespace {
|
||||
LoopDeletion() : LoopPass(ID) {
|
||||
initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
||||
// Possibly eliminate loop L if it is dead.
|
||||
bool runOnLoop(Loop* L, LPPassManager& LPM);
|
||||
|
||||
|
||||
bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
|
||||
SmallVector<BasicBlock*, 4>& exitBlocks,
|
||||
bool &Changed, BasicBlock *Preheader);
|
||||
@ -46,7 +46,7 @@ namespace {
|
||||
AU.addRequired<ScalarEvolution>();
|
||||
AU.addRequiredID(LoopSimplifyID);
|
||||
AU.addRequiredID(LCSSAID);
|
||||
|
||||
|
||||
AU.addPreserved<ScalarEvolution>();
|
||||
AU.addPreserved<DominatorTree>();
|
||||
AU.addPreserved<LoopInfo>();
|
||||
@ -55,7 +55,7 @@ namespace {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
char LoopDeletion::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
|
||||
"Delete dead loops", false, false)
|
||||
@ -79,7 +79,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,
|
||||
SmallVector<BasicBlock*, 4>& exitBlocks,
|
||||
bool &Changed, BasicBlock *Preheader) {
|
||||
BasicBlock* exitBlock = exitBlocks[0];
|
||||
|
||||
|
||||
// Make sure that all PHI entries coming from the loop are loop invariant.
|
||||
// Because the code is in LCSSA form, any values used outside of the loop
|
||||
// must pass through a PHI in the exit block, meaning that this check is
|
||||
@ -97,14 +97,14 @@ bool LoopDeletion::IsLoopDead(Loop* L,
|
||||
if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (Instruction* I = dyn_cast<Instruction>(incoming))
|
||||
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
|
||||
return false;
|
||||
|
||||
++BI;
|
||||
}
|
||||
|
||||
|
||||
// Make sure that no instructions in the block have potential side-effects.
|
||||
// This includes instructions that could write to memory, and loads that are
|
||||
// marked volatile. This could be made more aggressive by using aliasing
|
||||
@ -117,23 +117,23 @@ bool LoopDeletion::IsLoopDead(Loop* L,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
|
||||
/// observable behavior of the program other than finite running time. Note
|
||||
/// observable behavior of the program other than finite running time. Note
|
||||
/// we do ensure that this never remove a loop that might be infinite, as doing
|
||||
/// so could change the halting/non-halting nature of a program.
|
||||
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
|
||||
/// in order to make various safety checks work.
|
||||
bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
|
||||
// We can only remove the loop if there is a preheader that we can
|
||||
// We can only remove the loop if there is a preheader that we can
|
||||
// branch from after removing it.
|
||||
BasicBlock* preheader = L->getLoopPreheader();
|
||||
if (!preheader)
|
||||
return false;
|
||||
|
||||
|
||||
// If LoopSimplify form is not available, stay out of trouble.
|
||||
if (!L->hasDedicatedExits())
|
||||
return false;
|
||||
@ -142,36 +142,36 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
|
||||
// they would already have been removed in earlier executions of this pass.
|
||||
if (L->begin() != L->end())
|
||||
return false;
|
||||
|
||||
|
||||
SmallVector<BasicBlock*, 4> exitingBlocks;
|
||||
L->getExitingBlocks(exitingBlocks);
|
||||
|
||||
|
||||
SmallVector<BasicBlock*, 4> exitBlocks;
|
||||
L->getUniqueExitBlocks(exitBlocks);
|
||||
|
||||
|
||||
// We require that the loop only have a single exit block. Otherwise, we'd
|
||||
// be in the situation of needing to be able to solve statically which exit
|
||||
// block will be branched to, or trying to preserve the branching logic in
|
||||
// a loop invariant manner.
|
||||
if (exitBlocks.size() != 1)
|
||||
return false;
|
||||
|
||||
|
||||
// Finally, we have to check that the loop really is dead.
|
||||
bool Changed = false;
|
||||
if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
|
||||
return Changed;
|
||||
|
||||
|
||||
// Don't remove loops for which we can't solve the trip count.
|
||||
// They could be infinite, in which case we'd be changing program behavior.
|
||||
ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
|
||||
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
|
||||
if (isa<SCEVCouldNotCompute>(S))
|
||||
return Changed;
|
||||
|
||||
|
||||
// Now that we know the removal is safe, remove the loop by changing the
|
||||
// branch from the preheader to go to the single exit block.
|
||||
// branch from the preheader to go to the single exit block.
|
||||
BasicBlock* exitBlock = exitBlocks[0];
|
||||
|
||||
|
||||
// Because we're deleting a large chunk of code at once, the sequence in which
|
||||
// we remove things is very important to avoid invalidation issues. Don't
|
||||
// mess with this unless you have good reason and know what you're doing.
|
||||
@ -197,7 +197,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
|
||||
P->removeIncomingValue(exitingBlocks[i]);
|
||||
++BI;
|
||||
}
|
||||
|
||||
|
||||
// Update the dominator tree and remove the instructions and blocks that will
|
||||
// be deleted from the reference counting scheme.
|
||||
DominatorTree& DT = getAnalysis<DominatorTree>();
|
||||
@ -211,7 +211,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
|
||||
DE = ChildNodes.end(); DI != DE; ++DI) {
|
||||
DT.changeImmediateDominator(*DI, DT[preheader]);
|
||||
}
|
||||
|
||||
|
||||
ChildNodes.clear();
|
||||
DT.eraseNode(*LI);
|
||||
|
||||
@ -219,7 +219,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
|
||||
// delete it freely later.
|
||||
(*LI)->dropAllReferences();
|
||||
}
|
||||
|
||||
|
||||
// Erase the instructions and the blocks without having to worry
|
||||
// about ordering because we already dropped the references.
|
||||
// NOTE: This iteration is safe because erasing the block does not remove its
|
||||
@ -236,13 +236,13 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
|
||||
for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
|
||||
E = blocks.end(); I != E; ++I)
|
||||
loopInfo.removeBlock(*I);
|
||||
|
||||
|
||||
// The last step is to inform the loop pass manager that we've
|
||||
// eliminated this loop.
|
||||
LPM.deleteLoopFromQueue(L);
|
||||
Changed = true;
|
||||
|
||||
|
||||
++NumDeleted;
|
||||
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
|
||||
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
CurLoop = L;
|
||||
|
||||
// Disable loop idiom recognition if the function's name is a common idiom.
|
||||
// Disable loop idiom recognition if the function's name is a common idiom.
|
||||
StringRef Name = L->getHeader()->getParent()->getName();
|
||||
if (Name == "memset" || Name == "memcpy")
|
||||
return false;
|
||||
|
@ -48,7 +48,7 @@ namespace {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
char LoopInstSimplify::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
|
||||
"Simplify instructions in loops", false, false)
|
||||
|
@ -418,12 +418,13 @@ bool LoopRotate::rotateLoop(Loop *L) {
|
||||
}
|
||||
|
||||
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
|
||||
// thus is not a preheader anymore. Split the edge to form a real preheader.
|
||||
// thus is not a preheader anymore.
|
||||
// Split the edge to form a real preheader.
|
||||
BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
|
||||
NewPH->setName(NewHeader->getName() + ".lr.ph");
|
||||
|
||||
// Preserve canonical loop form, which means that 'Exit' should have only one
|
||||
// predecessor.
|
||||
// Preserve canonical loop form, which means that 'Exit' should have only
|
||||
// one predecessor.
|
||||
BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
|
||||
ExitSplit->moveBefore(Exit);
|
||||
} else {
|
||||
|
@ -25,12 +25,12 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
|
||||
Value *Ptr = CXI->getPointerOperand();
|
||||
Value *Cmp = CXI->getCompareOperand();
|
||||
Value *Val = CXI->getNewValOperand();
|
||||
|
||||
|
||||
LoadInst *Orig = Builder.CreateLoad(Ptr);
|
||||
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
|
||||
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
|
||||
Builder.CreateStore(Res, Ptr);
|
||||
|
||||
|
||||
CXI->replaceAllUsesWith(Orig);
|
||||
CXI->eraseFromParent();
|
||||
return true;
|
||||
|
@ -44,7 +44,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
|
||||
gep_type_iterator GTI = gep_type_begin(GEP);
|
||||
for (unsigned i = 1; i != Idx; ++i, ++GTI)
|
||||
/*skip along*/;
|
||||
|
||||
|
||||
// Compute the offset implied by the rest of the indices.
|
||||
int64_t Offset = 0;
|
||||
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
|
||||
@ -58,7 +58,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
|
||||
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, we have a sequential type like an array or vector. Multiply
|
||||
// the index by the ElementSize.
|
||||
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
|
||||
@ -77,7 +77,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
|
||||
Ptr2 = Ptr2->stripPointerCasts();
|
||||
GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
|
||||
GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
|
||||
|
||||
|
||||
bool VariableIdxFound = false;
|
||||
|
||||
// If one pointer is a GEP and the other isn't, then see if the GEP is a
|
||||
@ -91,7 +91,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
|
||||
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
|
||||
return !VariableIdxFound;
|
||||
}
|
||||
|
||||
|
||||
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
|
||||
// base. After that base, they may have some number of common (and
|
||||
// potentially variable) indices. After that they handle some constant
|
||||
@ -99,7 +99,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
|
||||
// handle no other case.
|
||||
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
|
||||
return false;
|
||||
|
||||
|
||||
// Skip any common indices and track the GEP types.
|
||||
unsigned Idx = 1;
|
||||
for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
|
||||
@ -109,7 +109,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
|
||||
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
|
||||
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
|
||||
if (VariableIdxFound) return false;
|
||||
|
||||
|
||||
Offset = Offset2-Offset1;
|
||||
return true;
|
||||
}
|
||||
@ -128,19 +128,19 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
|
||||
namespace {
|
||||
struct MemsetRange {
|
||||
// Start/End - A semi range that describes the span that this range covers.
|
||||
// The range is closed at the start and open at the end: [Start, End).
|
||||
// The range is closed at the start and open at the end: [Start, End).
|
||||
int64_t Start, End;
|
||||
|
||||
/// StartPtr - The getelementptr instruction that points to the start of the
|
||||
/// range.
|
||||
Value *StartPtr;
|
||||
|
||||
|
||||
/// Alignment - The known alignment of the first store.
|
||||
unsigned Alignment;
|
||||
|
||||
|
||||
/// TheStores - The actual stores that make up this range.
|
||||
SmallVector<Instruction*, 16> TheStores;
|
||||
|
||||
|
||||
bool isProfitableToUseMemset(const TargetData &TD) const;
|
||||
|
||||
};
|
||||
@ -152,17 +152,17 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
|
||||
|
||||
// If there is nothing to merge, don't do anything.
|
||||
if (TheStores.size() < 2) return false;
|
||||
|
||||
|
||||
// If any of the stores are a memset, then it is always good to extend the
|
||||
// memset.
|
||||
for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
|
||||
if (!isa<StoreInst>(TheStores[i]))
|
||||
return true;
|
||||
|
||||
|
||||
// Assume that the code generator is capable of merging pairs of stores
|
||||
// together if it wants to.
|
||||
if (TheStores.size() == 2) return false;
|
||||
|
||||
|
||||
// If we have fewer than 8 stores, it can still be worthwhile to do this.
|
||||
// For example, merging 4 i8 stores into an i32 store is useful almost always.
|
||||
// However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
|
||||
@ -175,15 +175,15 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
|
||||
// actually reducing the number of stores used.
|
||||
unsigned Bytes = unsigned(End-Start);
|
||||
unsigned NumPointerStores = Bytes/TD.getPointerSize();
|
||||
|
||||
|
||||
// Assume the remaining bytes if any are done a byte at a time.
|
||||
unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
|
||||
|
||||
|
||||
// If we will reduce the # stores (according to this heuristic), do the
|
||||
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
|
||||
// etc.
|
||||
return TheStores.size() > NumPointerStores+NumByteStores;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
@ -195,12 +195,12 @@ class MemsetRanges {
|
||||
const TargetData &TD;
|
||||
public:
|
||||
MemsetRanges(const TargetData &td) : TD(td) {}
|
||||
|
||||
|
||||
typedef std::list<MemsetRange>::const_iterator const_iterator;
|
||||
const_iterator begin() const { return Ranges.begin(); }
|
||||
const_iterator end() const { return Ranges.end(); }
|
||||
bool empty() const { return Ranges.empty(); }
|
||||
|
||||
|
||||
void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
|
||||
addStore(OffsetFromFirst, SI);
|
||||
@ -210,21 +210,21 @@ public:
|
||||
|
||||
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
|
||||
int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
|
||||
|
||||
|
||||
addRange(OffsetFromFirst, StoreSize,
|
||||
SI->getPointerOperand(), SI->getAlignment(), SI);
|
||||
}
|
||||
|
||||
|
||||
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
|
||||
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
|
||||
addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
|
||||
}
|
||||
|
||||
|
||||
void addRange(int64_t Start, int64_t Size, Value *Ptr,
|
||||
unsigned Alignment, Instruction *Inst);
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // end anon namespace
|
||||
|
||||
|
||||
@ -240,10 +240,10 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
|
||||
unsigned Alignment, Instruction *Inst) {
|
||||
int64_t End = Start+Size;
|
||||
range_iterator I = Ranges.begin(), E = Ranges.end();
|
||||
|
||||
|
||||
while (I != E && Start > I->End)
|
||||
++I;
|
||||
|
||||
|
||||
// We now know that I == E, in which case we didn't find anything to merge
|
||||
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
|
||||
// to insert a new range. Handle this now.
|
||||
@ -256,18 +256,18 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
|
||||
R.TheStores.push_back(Inst);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// This store overlaps with I, add it.
|
||||
I->TheStores.push_back(Inst);
|
||||
|
||||
|
||||
// At this point, we may have an interval that completely contains our store.
|
||||
// If so, just add it to the interval and return.
|
||||
if (I->Start <= Start && I->End >= End)
|
||||
return;
|
||||
|
||||
|
||||
// Now we know that Start <= I->End and End >= I->Start so the range overlaps
|
||||
// but is not entirely contained within the range.
|
||||
|
||||
|
||||
// See if the range extends the start of the range. In this case, it couldn't
|
||||
// possibly cause it to join the prior range, because otherwise we would have
|
||||
// stopped on *it*.
|
||||
@ -276,7 +276,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
|
||||
I->StartPtr = Ptr;
|
||||
I->Alignment = Alignment;
|
||||
}
|
||||
|
||||
|
||||
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
|
||||
// is in or right at the end of I), and that End >= I->Start. Extend I out to
|
||||
// End.
|
||||
@ -325,7 +325,7 @@ namespace {
|
||||
AU.addPreserved<AliasAnalysis>();
|
||||
AU.addPreserved<MemoryDependenceAnalysis>();
|
||||
}
|
||||
|
||||
|
||||
// Helper fuctions
|
||||
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
|
||||
bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
|
||||
@ -341,7 +341,7 @@ namespace {
|
||||
|
||||
bool iterateOnFunction(Function &F);
|
||||
};
|
||||
|
||||
|
||||
char MemCpyOpt::ID = 0;
|
||||
}
|
||||
|
||||
@ -361,16 +361,16 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
|
||||
/// some other patterns to fold away. In particular, this looks for stores to
|
||||
/// neighboring locations of memory. If it sees enough consecutive ones, it
|
||||
/// attempts to merge them together into a memcpy/memset.
|
||||
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
|
||||
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
|
||||
Value *StartPtr, Value *ByteVal) {
|
||||
if (TD == 0) return 0;
|
||||
|
||||
|
||||
// Okay, so we now have a single store that can be splatable. Scan to find
|
||||
// all subsequent stores of the same value to offset from the same pointer.
|
||||
// Join these together into ranges, so we can decide whether contiguous blocks
|
||||
// are stored.
|
||||
MemsetRanges Ranges(*TD);
|
||||
|
||||
|
||||
BasicBlock::iterator BI = StartInst;
|
||||
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
|
||||
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
|
||||
@ -381,43 +381,43 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
|
||||
// If this is a store, see if we can merge it in.
|
||||
if (!NextStore->isSimple()) break;
|
||||
|
||||
|
||||
// Check to see if this stored value is of the same byte-splattable value.
|
||||
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
|
||||
break;
|
||||
|
||||
|
||||
// Check to see if this store is to a constant offset from the start ptr.
|
||||
int64_t Offset;
|
||||
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
|
||||
Offset, *TD))
|
||||
break;
|
||||
|
||||
|
||||
Ranges.addStore(Offset, NextStore);
|
||||
} else {
|
||||
MemSetInst *MSI = cast<MemSetInst>(BI);
|
||||
|
||||
|
||||
if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
|
||||
!isa<ConstantInt>(MSI->getLength()))
|
||||
break;
|
||||
|
||||
|
||||
// Check to see if this store is to a constant offset from the start ptr.
|
||||
int64_t Offset;
|
||||
if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
|
||||
break;
|
||||
|
||||
|
||||
Ranges.addMemSet(Offset, MSI);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If we have no ranges, then we just had a single store with nothing that
|
||||
// could be merged in. This is a very common case of course.
|
||||
if (Ranges.empty())
|
||||
return 0;
|
||||
|
||||
|
||||
// If we had at least one store that could be merged in, add the starting
|
||||
// store as well. We try to avoid this unless there is at least something
|
||||
// interesting as a small compile-time optimization.
|
||||
@ -434,28 +434,28 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
|
||||
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
|
||||
I != E; ++I) {
|
||||
const MemsetRange &Range = *I;
|
||||
|
||||
|
||||
if (Range.TheStores.size() == 1) continue;
|
||||
|
||||
|
||||
// If it is profitable to lower this range to memset, do so now.
|
||||
if (!Range.isProfitableToUseMemset(*TD))
|
||||
continue;
|
||||
|
||||
|
||||
// Otherwise, we do want to transform this! Create a new memset.
|
||||
// Get the starting pointer of the block.
|
||||
StartPtr = Range.StartPtr;
|
||||
|
||||
|
||||
// Determine alignment
|
||||
unsigned Alignment = Range.Alignment;
|
||||
if (Alignment == 0) {
|
||||
Type *EltType =
|
||||
Type *EltType =
|
||||
cast<PointerType>(StartPtr->getType())->getElementType();
|
||||
Alignment = TD->getABITypeAlignment(EltType);
|
||||
}
|
||||
|
||||
AMemSet =
|
||||
|
||||
AMemSet =
|
||||
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
|
||||
|
||||
|
||||
DEBUG(dbgs() << "Replace stores:\n";
|
||||
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
|
||||
dbgs() << *Range.TheStores[i] << '\n';
|
||||
@ -473,14 +473,14 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
|
||||
}
|
||||
++NumMemSetInfer;
|
||||
}
|
||||
|
||||
|
||||
return AMemSet;
|
||||
}
|
||||
|
||||
|
||||
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
if (!SI->isSimple()) return false;
|
||||
|
||||
|
||||
if (TD == 0) return false;
|
||||
|
||||
// Detect cases where we're performing call slot forwarding, but
|
||||
@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
|
||||
if (C) {
|
||||
bool changed = performCallSlotOptzn(LI,
|
||||
SI->getPointerOperand()->stripPointerCasts(),
|
||||
SI->getPointerOperand()->stripPointerCasts(),
|
||||
LI->getPointerOperand()->stripPointerCasts(),
|
||||
TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
|
||||
if (changed) {
|
||||
@ -524,10 +524,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// There are two cases that are interesting for this code to handle: memcpy
|
||||
// and memset. Right now we only handle memset.
|
||||
|
||||
|
||||
// Ensure that the value being stored is something that can be memset'able a
|
||||
// byte at a time like "0" or "-1" or any width, as well as things like
|
||||
// 0xA0A0A0A0 and 0.0.
|
||||
@ -537,7 +537,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
BBI = I; // Don't invalidate iterator.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -680,7 +680,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
|
||||
if (CS.getArgument(i)->getType() == cpyDest->getType())
|
||||
CS.setArgument(i, cpyDest);
|
||||
else
|
||||
CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
|
||||
CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
|
||||
CS.getArgument(i)->getType(), cpyDest->getName(), C));
|
||||
}
|
||||
|
||||
@ -701,14 +701,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
|
||||
/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
|
||||
/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
|
||||
/// copy from MDep's input if we can. MSize is the size of M's copy.
|
||||
///
|
||||
///
|
||||
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
uint64_t MSize) {
|
||||
// We can only transforms memcpy's where the dest of one is the source of the
|
||||
// other.
|
||||
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
|
||||
return false;
|
||||
|
||||
|
||||
// If dep instruction is reading from our current input, then it is a noop
|
||||
// transfer and substituting the input won't change this instruction. Just
|
||||
// ignore the input and let someone else zap MDep. This handles cases like:
|
||||
@ -716,14 +716,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
// memcpy(b <- a)
|
||||
if (M->getSource() == MDep->getSource())
|
||||
return false;
|
||||
|
||||
|
||||
// Second, the length of the memcpy's must be the same, or the preceding one
|
||||
// must be larger than the following one.
|
||||
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
|
||||
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
|
||||
return false;
|
||||
|
||||
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
|
||||
// Verify that the copied-from memory doesn't change in between the two
|
||||
@ -743,23 +743,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
false, M, M->getParent());
|
||||
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
|
||||
return false;
|
||||
|
||||
|
||||
// If the dest of the second might alias the source of the first, then the
|
||||
// source and dest might overlap. We still want to eliminate the intermediate
|
||||
// value, but we have to generate a memmove instead of memcpy.
|
||||
bool UseMemMove = false;
|
||||
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
|
||||
UseMemMove = true;
|
||||
|
||||
|
||||
// If all checks passed, then we can transform M.
|
||||
|
||||
|
||||
// Make sure to use the lesser of the alignment of the source and the dest
|
||||
// since we're changing where we're reading from, but don't want to increase
|
||||
// the alignment past what can be read from or written to.
|
||||
// TODO: Is this worth it if we're creating a less aligned memcpy? For
|
||||
// example we could be moving from movaps -> movq on x86.
|
||||
unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
|
||||
|
||||
|
||||
IRBuilder<> Builder(M);
|
||||
if (UseMemMove)
|
||||
Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
|
||||
@ -839,13 +839,13 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
|
||||
|
||||
if (!TLI->has(LibFunc::memmove))
|
||||
return false;
|
||||
|
||||
|
||||
// See if the pointers alias.
|
||||
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
|
||||
return false;
|
||||
|
||||
|
||||
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
|
||||
|
||||
|
||||
// If not, then we know we can transform this.
|
||||
Module *Mod = M->getParent()->getParent()->getParent();
|
||||
Type *ArgTys[3] = { M->getRawDest()->getType(),
|
||||
@ -861,7 +861,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
|
||||
++NumMoveToCpy;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// processByValArgument - This is called on every byval argument in call sites.
|
||||
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
|
||||
if (TD == 0) return false;
|
||||
@ -884,7 +884,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
|
||||
if (MDep == 0 || MDep->isVolatile() ||
|
||||
ByValArg->stripPointerCasts() != MDep->getDest())
|
||||
return false;
|
||||
|
||||
|
||||
// The length of the memcpy must be larger or equal to the size of the byval.
|
||||
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
|
||||
if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
|
||||
@ -894,13 +894,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
|
||||
// then it is some target specific value that we can't know.
|
||||
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
|
||||
if (ByValAlign == 0) return false;
|
||||
|
||||
|
||||
// If it is greater than the memcpy, then we check to see if we can force the
|
||||
// source of the memcpy to the alignment we need. If we fail, we bail out.
|
||||
if (MDep->getAlignment() < ByValAlign &&
|
||||
getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
|
||||
return false;
|
||||
|
||||
|
||||
// Verify that the copied-from memory doesn't change in between the memcpy and
|
||||
// the byval call.
|
||||
// memcpy(a <- b)
|
||||
@ -915,16 +915,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
|
||||
false, CS.getInstruction(), MDep->getParent());
|
||||
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
|
||||
return false;
|
||||
|
||||
|
||||
Value *TmpCast = MDep->getSource();
|
||||
if (MDep->getSource()->getType() != ByValArg->getType())
|
||||
TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
|
||||
"tmpcast", CS.getInstruction());
|
||||
|
||||
|
||||
DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
|
||||
<< " " << *MDep << "\n"
|
||||
<< " " << *CS.getInstruction() << "\n");
|
||||
|
||||
|
||||
// Otherwise we're good! Update the byval argument.
|
||||
CS.setArgument(ArgNo, TmpCast);
|
||||
++NumMemCpyInstr;
|
||||
@ -940,9 +940,9 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
|
||||
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
|
||||
// Avoid invalidating the iterator.
|
||||
Instruction *I = BI++;
|
||||
|
||||
|
||||
bool RepeatInstruction = false;
|
||||
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||
MadeChange |= processStore(SI, BI);
|
||||
else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
|
||||
@ -964,7 +964,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
@ -976,19 +976,19 @@ bool MemCpyOpt::runOnFunction(Function &F) {
|
||||
MD = &getAnalysis<MemoryDependenceAnalysis>();
|
||||
TD = getAnalysisIfAvailable<TargetData>();
|
||||
TLI = &getAnalysis<TargetLibraryInfo>();
|
||||
|
||||
|
||||
// If we don't have at least memset and memcpy, there is little point of doing
|
||||
// anything here. These are required by a freestanding implementation, so if
|
||||
// even they are disabled, there is no point in trying hard.
|
||||
if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
|
||||
return false;
|
||||
|
||||
|
||||
while (1) {
|
||||
if (!iterateOnFunction(F))
|
||||
break;
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
|
||||
MD = 0;
|
||||
return MadeChange;
|
||||
}
|
||||
|
@ -375,7 +375,7 @@ typedef std::pair<Value*, APInt> RepeatedValue;
|
||||
/// nodes in Ops along with their weights (how many times the leaf occurs). The
|
||||
/// original expression is the same as
|
||||
/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
|
||||
/// op
|
||||
/// op
|
||||
/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
|
||||
/// op
|
||||
/// ...
|
||||
|
@ -59,7 +59,7 @@ namespace {
|
||||
virtual bool runOnFunction(Function &F);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
char RegToMem::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
|
||||
false, false)
|
||||
@ -68,25 +68,25 @@ INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
|
||||
false, false)
|
||||
|
||||
bool RegToMem::runOnFunction(Function &F) {
|
||||
if (F.isDeclaration())
|
||||
if (F.isDeclaration())
|
||||
return false;
|
||||
|
||||
|
||||
// Insert all new allocas into entry block.
|
||||
BasicBlock *BBEntry = &F.getEntryBlock();
|
||||
assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
|
||||
"Entry block to function must not have predecessors!");
|
||||
|
||||
|
||||
// Find first non-alloca instruction and create insertion point. This is
|
||||
// safe if block is well-formed: it always have terminator, otherwise
|
||||
// we'll get and assertion.
|
||||
BasicBlock::iterator I = BBEntry->begin();
|
||||
while (isa<AllocaInst>(I)) ++I;
|
||||
|
||||
|
||||
CastInst *AllocaInsertionPoint =
|
||||
new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
|
||||
Type::getInt32Ty(F.getContext()),
|
||||
"reg2mem alloca point", I);
|
||||
|
||||
|
||||
// Find the escaped instructions. But don't create stack slots for
|
||||
// allocas in entry block.
|
||||
std::list<Instruction*> WorkList;
|
||||
@ -99,15 +99,15 @@ bool RegToMem::runOnFunction(Function &F) {
|
||||
WorkList.push_front(&*iib);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Demote escaped instructions
|
||||
NumRegsDemoted += WorkList.size();
|
||||
for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
|
||||
for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
|
||||
ile = WorkList.end(); ilb != ile; ++ilb)
|
||||
DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
|
||||
|
||||
|
||||
WorkList.clear();
|
||||
|
||||
|
||||
// Find all phi's
|
||||
for (Function::iterator ibb = F.begin(), ibe = F.end();
|
||||
ibb != ibe; ++ibb)
|
||||
@ -115,19 +115,18 @@ bool RegToMem::runOnFunction(Function &F) {
|
||||
iib != iie; ++iib)
|
||||
if (isa<PHINode>(iib))
|
||||
WorkList.push_front(&*iib);
|
||||
|
||||
|
||||
// Demote phi nodes
|
||||
NumPhisDemoted += WorkList.size();
|
||||
for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
|
||||
for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
|
||||
ile = WorkList.end(); ilb != ile; ++ilb)
|
||||
DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
|
||||
//
|
||||
char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
|
||||
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
|
||||
return new RegToMem();
|
||||
|
@ -409,7 +409,7 @@ private:
|
||||
|
||||
if (Constant *C = dyn_cast<Constant>(V)) {
|
||||
Constant *Elt = C->getAggregateElement(i);
|
||||
|
||||
|
||||
if (Elt == 0)
|
||||
LV.markOverdefined(); // Unknown sort of constant.
|
||||
else if (isa<UndefValue>(Elt))
|
||||
|
@ -7,7 +7,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements common infrastructure for libLLVMScalarOpts.a, which
|
||||
// This file implements common infrastructure for libLLVMScalarOpts.a, which
|
||||
// implements several scalar transformations over the LLVM intermediate
|
||||
// representation, including the C bindings for that library.
|
||||
//
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/// initializeScalarOptsPasses - Initialize all passes linked into the
|
||||
/// initializeScalarOptsPasses - Initialize all passes linked into the
|
||||
/// ScalarOpts library.
|
||||
void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
||||
initializeADCEPass(Registry);
|
||||
|
@ -104,7 +104,7 @@ namespace {
|
||||
/// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
|
||||
/// looping and avoid redundant work.
|
||||
SmallPtrSet<PHINode*, 8> CheckedPHIs;
|
||||
|
||||
|
||||
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
|
||||
bool isUnsafe : 1;
|
||||
|
||||
@ -118,12 +118,12 @@ namespace {
|
||||
/// ever accessed, or false if the alloca is only accessed with mem
|
||||
/// intrinsics or load/store that only access the entire alloca at once.
|
||||
bool hasSubelementAccess : 1;
|
||||
|
||||
|
||||
/// hasALoadOrStore - This is true if there are any loads or stores to it.
|
||||
/// The alloca may just be accessed with memcpy, for example, which would
|
||||
/// not set this.
|
||||
bool hasALoadOrStore : 1;
|
||||
|
||||
|
||||
explicit AllocaInfo(AllocaInst *ai)
|
||||
: AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
|
||||
hasSubelementAccess(false), hasALoadOrStore(false) {}
|
||||
@ -187,7 +187,7 @@ namespace {
|
||||
static MemTransferInst *isOnlyCopiedFromConstantGlobal(
|
||||
AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
|
||||
};
|
||||
|
||||
|
||||
// SROA_DT - SROA that uses DominatorTree.
|
||||
struct SROA_DT : public SROA {
|
||||
static char ID;
|
||||
@ -196,7 +196,7 @@ namespace {
|
||||
SROA(T, true, ID, ST, AT, SLT) {
|
||||
initializeSROA_DTPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
||||
// getAnalysisUsage - This pass does not require any passes, but we know it
|
||||
// will not alter the CFG, so say so.
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
@ -204,7 +204,7 @@ namespace {
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// SROA_SSAUp - SROA that uses SSAUpdater.
|
||||
struct SROA_SSAUp : public SROA {
|
||||
static char ID;
|
||||
@ -213,14 +213,14 @@ namespace {
|
||||
SROA(T, false, ID, ST, AT, SLT) {
|
||||
initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
||||
// getAnalysisUsage - This pass does not require any passes, but we know it
|
||||
// will not alter the CFG, so say so.
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
char SROA_DT::ID = 0;
|
||||
@ -294,7 +294,7 @@ class ConvertToScalarInfo {
|
||||
/// isn't possible to turn into a vector type, it gets set to VoidTy.
|
||||
VectorType *VectorTy;
|
||||
|
||||
/// HadNonMemTransferAccess - True if there is at least one access to the
|
||||
/// HadNonMemTransferAccess - True if there is at least one access to the
|
||||
/// alloca that is not a MemTransferInst. We don't want to turn structs into
|
||||
/// large integers unless there is some potential for optimization.
|
||||
bool HadNonMemTransferAccess;
|
||||
@ -1050,7 +1050,7 @@ public:
|
||||
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
|
||||
DIBuilder *DB)
|
||||
: LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
|
||||
|
||||
|
||||
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
|
||||
// Remember which alloca we're promoting (for isInstInList).
|
||||
this->AI = AI;
|
||||
@ -1065,18 +1065,18 @@ public:
|
||||
|
||||
LoadAndStorePromoter::run(Insts);
|
||||
AI->eraseFromParent();
|
||||
for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
|
||||
for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
|
||||
E = DDIs.end(); I != E; ++I) {
|
||||
DbgDeclareInst *DDI = *I;
|
||||
DDI->eraseFromParent();
|
||||
}
|
||||
for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
|
||||
for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
|
||||
E = DVIs.end(); I != E; ++I) {
|
||||
DbgValueInst *DVI = *I;
|
||||
DVI->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
virtual bool isInstInList(Instruction *I,
|
||||
const SmallVectorImpl<Instruction*> &Insts) const {
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I))
|
||||
@ -1085,7 +1085,7 @@ public:
|
||||
}
|
||||
|
||||
virtual void updateDebugInfo(Instruction *Inst) const {
|
||||
for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
|
||||
for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
|
||||
E = DDIs.end(); I != E; ++I) {
|
||||
DbgDeclareInst *DDI = *I;
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
|
||||
@ -1093,7 +1093,7 @@ public:
|
||||
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
|
||||
ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
|
||||
}
|
||||
for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
|
||||
for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
|
||||
E = DVIs.end(); I != E; ++I) {
|
||||
DbgValueInst *DVI = *I;
|
||||
Value *Arg = NULL;
|
||||
@ -1136,12 +1136,12 @@ public:
|
||||
static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
|
||||
bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
|
||||
bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
|
||||
|
||||
|
||||
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
|
||||
UI != UE; ++UI) {
|
||||
LoadInst *LI = dyn_cast<LoadInst>(*UI);
|
||||
if (LI == 0 || !LI->isSimple()) return false;
|
||||
|
||||
|
||||
// Both operands to the select need to be dereferencable, either absolutely
|
||||
// (e.g. allocas) or at this point because we can see other accesses to it.
|
||||
if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
|
||||
@ -1151,7 +1151,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
|
||||
LI->getAlignment(), TD))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1182,20 +1182,20 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
|
||||
UI != UE; ++UI) {
|
||||
LoadInst *LI = dyn_cast<LoadInst>(*UI);
|
||||
if (LI == 0 || !LI->isSimple()) return false;
|
||||
|
||||
|
||||
// For now we only allow loads in the same block as the PHI. This is a
|
||||
// common case that happens when instcombine merges two loads through a PHI.
|
||||
if (LI->getParent() != BB) return false;
|
||||
|
||||
|
||||
// Ensure that there are no instructions between the PHI and the load that
|
||||
// could store.
|
||||
for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
|
||||
if (BBI->mayWriteToMemory())
|
||||
return false;
|
||||
|
||||
|
||||
MaxAlign = std::max(MaxAlign, LI->getAlignment());
|
||||
}
|
||||
|
||||
|
||||
// Okay, we know that we have one or more loads in the same block as the PHI.
|
||||
// We can transform this if it is safe to push the loads into the predecessor
|
||||
// blocks. The only thing to watch out for is that we can't put a possibly
|
||||
@ -1223,10 +1223,10 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
|
||||
if (InVal->isDereferenceablePointer() ||
|
||||
isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
|
||||
continue;
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1238,7 +1238,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
|
||||
static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
SetVector<Instruction*, SmallVector<Instruction*, 4>,
|
||||
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
|
||||
|
||||
|
||||
for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
|
||||
UI != UE; ++UI) {
|
||||
User *U = *UI;
|
||||
@ -1247,7 +1247,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
|
||||
if (SI->getOperand(0) == AI || !SI->isSimple())
|
||||
return false; // Don't allow a store OF the AI, only INTO the AI.
|
||||
@ -1261,7 +1261,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
Value *Result = SI->getOperand(1+CI->isZero());
|
||||
SI->replaceAllUsesWith(Result);
|
||||
SI->eraseFromParent();
|
||||
|
||||
|
||||
// This is very rare and we just scrambled the use list of AI, start
|
||||
// over completely.
|
||||
return tryToMakeAllocaBePromotable(AI, TD);
|
||||
@ -1271,33 +1271,33 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
// loads, then we can transform this by rewriting the select.
|
||||
if (!isSafeSelectToSpeculate(SI, TD))
|
||||
return false;
|
||||
|
||||
|
||||
InstsToRewrite.insert(SI);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (PHINode *PN = dyn_cast<PHINode>(U)) {
|
||||
if (PN->use_empty()) { // Dead PHIs can be stripped.
|
||||
InstsToRewrite.insert(PN);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
|
||||
// in the pred blocks, then we can transform this by rewriting the PHI.
|
||||
if (!isSafePHIToSpeculate(PN, TD))
|
||||
return false;
|
||||
|
||||
|
||||
InstsToRewrite.insert(PN);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
|
||||
if (onlyUsedByLifetimeMarkers(BCI)) {
|
||||
InstsToRewrite.insert(BCI);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1305,7 +1305,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
// we're done!
|
||||
if (InstsToRewrite.empty())
|
||||
return true;
|
||||
|
||||
|
||||
// If we have instructions that need to be rewritten for this to be promotable
|
||||
// take care of it now.
|
||||
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
|
||||
@ -1326,13 +1326,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
// loads with a new select.
|
||||
while (!SI->use_empty()) {
|
||||
LoadInst *LI = cast<LoadInst>(SI->use_back());
|
||||
|
||||
|
||||
IRBuilder<> Builder(LI);
|
||||
LoadInst *TrueLoad =
|
||||
LoadInst *TrueLoad =
|
||||
Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
|
||||
LoadInst *FalseLoad =
|
||||
LoadInst *FalseLoad =
|
||||
Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
|
||||
|
||||
|
||||
// Transfer alignment and TBAA info if present.
|
||||
TrueLoad->setAlignment(LI->getAlignment());
|
||||
FalseLoad->setAlignment(LI->getAlignment());
|
||||
@ -1340,18 +1340,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
|
||||
FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
|
||||
}
|
||||
|
||||
|
||||
Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
|
||||
V->takeName(LI);
|
||||
LI->replaceAllUsesWith(V);
|
||||
LI->eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
// Now that all the loads are gone, the select is gone too.
|
||||
SI->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, we have a PHI node which allows us to push the loads into the
|
||||
// predecessors.
|
||||
PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
|
||||
@ -1359,7 +1359,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
PN->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
|
||||
PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
|
||||
PN->getName()+".ld", PN);
|
||||
@ -1369,18 +1369,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
|
||||
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
|
||||
unsigned Align = SomeLoad->getAlignment();
|
||||
|
||||
|
||||
// Rewrite all loads of the PN to use the new PHI.
|
||||
while (!PN->use_empty()) {
|
||||
LoadInst *LI = cast<LoadInst>(PN->use_back());
|
||||
LI->replaceAllUsesWith(NewPN);
|
||||
LI->eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
// Inject loads into all of the pred blocks. Keep track of which blocks we
|
||||
// insert them into in case we have multiple edges from the same block.
|
||||
DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
|
||||
|
||||
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
||||
BasicBlock *Pred = PN->getIncomingBlock(i);
|
||||
LoadInst *&Load = InsertedLoads[Pred];
|
||||
@ -1391,13 +1391,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
|
||||
Load->setAlignment(Align);
|
||||
if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
|
||||
}
|
||||
|
||||
|
||||
NewPN->addIncoming(Load, Pred);
|
||||
}
|
||||
|
||||
|
||||
PN->eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
++NumAdjusted;
|
||||
return true;
|
||||
}
|
||||
@ -1430,7 +1430,7 @@ bool SROA::performPromotion(Function &F) {
|
||||
SSAUpdater SSA;
|
||||
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
|
||||
AllocaInst *AI = Allocas[i];
|
||||
|
||||
|
||||
// Build list of instructions to promote.
|
||||
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
|
||||
UI != E; ++UI)
|
||||
@ -1667,12 +1667,12 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
|
||||
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
|
||||
LIType, false, Info, LI, true /*AllowWholeAccess*/);
|
||||
Info.hasALoadOrStore = true;
|
||||
|
||||
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
|
||||
// Store is ok if storing INTO the pointer, not storing the pointer
|
||||
if (!SI->isSimple() || SI->getOperand(0) == I)
|
||||
return MarkUnsafe(Info, User);
|
||||
|
||||
|
||||
Type *SIType = SI->getOperand(0)->getType();
|
||||
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
|
||||
SIType, true, Info, SI, true /*AllowWholeAccess*/);
|
||||
@ -1689,7 +1689,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
|
||||
if (Info.isUnsafe) return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
|
||||
/// derived from the alloca, we can often still split the alloca into elements.
|
||||
@ -1706,10 +1706,10 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
|
||||
if (PHINode *PN = dyn_cast<PHINode>(I))
|
||||
if (!Info.CheckedPHIs.insert(PN))
|
||||
return;
|
||||
|
||||
|
||||
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
|
||||
Instruction *User = cast<Instruction>(*UI);
|
||||
|
||||
|
||||
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
|
||||
isSafePHISelectUseForScalarRepl(BC, Offset, Info);
|
||||
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
|
||||
@ -1726,12 +1726,12 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
|
||||
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
|
||||
LIType, false, Info, LI, false /*AllowWholeAccess*/);
|
||||
Info.hasALoadOrStore = true;
|
||||
|
||||
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
|
||||
// Store is ok if storing INTO the pointer, not storing the pointer
|
||||
if (!SI->isSimple() || SI->getOperand(0) == I)
|
||||
return MarkUnsafe(Info, User);
|
||||
|
||||
|
||||
Type *SIType = SI->getOperand(0)->getType();
|
||||
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
|
||||
SIType, true, Info, SI, false /*AllowWholeAccess*/);
|
||||
@ -1925,12 +1925,12 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
|
||||
RewriteBitCast(BC, AI, Offset, NewElts);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
|
||||
RewriteGEP(GEPI, AI, Offset, NewElts);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
|
||||
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
|
||||
uint64_t MemSize = Length->getZExtValue();
|
||||
@ -1949,10 +1949,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
|
||||
Type *LIType = LI->getType();
|
||||
|
||||
|
||||
if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
|
||||
// Replace:
|
||||
// %res = load { i32, i32 }* %alloc
|
||||
@ -1978,7 +1978,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
|
||||
Value *Val = SI->getOperand(0);
|
||||
Type *SIType = Val->getType();
|
||||
@ -2005,16 +2005,16 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (isa<SelectInst>(User) || isa<PHINode>(User)) {
|
||||
// If we have a PHI user of the alloca itself (as opposed to a GEP or
|
||||
// If we have a PHI user of the alloca itself (as opposed to a GEP or
|
||||
// bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
|
||||
// the new pointer.
|
||||
if (!isa<AllocaInst>(I)) continue;
|
||||
|
||||
|
||||
assert(Offset == 0 && NewElts[0] &&
|
||||
"Direct alloca use should have a zero offset");
|
||||
|
||||
|
||||
// If we have a use of the alloca, we know the derived uses will be
|
||||
// utilizing just the first element of the scalarized result. Insert a
|
||||
// bitcast of the first alloca before the user as required.
|
||||
@ -2386,7 +2386,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
|
||||
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
|
||||
|
||||
IRBuilder<> Builder(SI);
|
||||
|
||||
|
||||
// Handle tail padding by extending the operand
|
||||
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
|
||||
SrcVal = Builder.CreateZExt(SrcVal,
|
||||
@ -2648,7 +2648,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
|
||||
// nodes.
|
||||
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
|
||||
(*SI)->removePredecessor(BB);
|
||||
|
||||
|
||||
// Insert a call to llvm.trap right before this. This turns the undefined
|
||||
// behavior into a hard fail instead of falling through into random code.
|
||||
if (UseLLVMTrap) {
|
||||
@ -77,7 +77,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
|
||||
CallTrap->setDebugLoc(I->getDebugLoc());
|
||||
}
|
||||
new UnreachableInst(I->getContext(), I);
|
||||
|
||||
|
||||
// All instructions after this are dead.
|
||||
BasicBlock::iterator BBI = I, BBE = BB->end();
|
||||
while (BBI != BBE) {
|
||||
@ -107,13 +107,13 @@ static void ChangeToCall(InvokeInst *II) {
|
||||
|
||||
static bool MarkAliveBlocks(BasicBlock *BB,
|
||||
SmallPtrSet<BasicBlock*, 128> &Reachable) {
|
||||
|
||||
|
||||
SmallVector<BasicBlock*, 128> Worklist;
|
||||
Worklist.push_back(BB);
|
||||
bool Changed = false;
|
||||
do {
|
||||
BB = Worklist.pop_back_val();
|
||||
|
||||
|
||||
if (!Reachable.insert(BB))
|
||||
continue;
|
||||
|
||||
@ -135,7 +135,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Store to undef and store to null are undefined and used to signal that
|
||||
// they should be changed to unreachable by passes that can't modify the
|
||||
// CFG.
|
||||
@ -144,7 +144,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
|
||||
if (SI->isVolatile()) continue;
|
||||
|
||||
Value *Ptr = SI->getOperand(1);
|
||||
|
||||
|
||||
if (isa<UndefValue>(Ptr) ||
|
||||
(isa<ConstantPointerNull>(Ptr) &&
|
||||
SI->getPointerAddressSpace() == 0)) {
|
||||
@ -180,38 +180,38 @@ static bool MarkAliveBlocks(BasicBlock *BB,
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
|
||||
/// if they are in a dead cycle. Return true if a change was made, false
|
||||
/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
|
||||
/// if they are in a dead cycle. Return true if a change was made, false
|
||||
/// otherwise.
|
||||
static bool RemoveUnreachableBlocksFromFn(Function &F) {
|
||||
SmallPtrSet<BasicBlock*, 128> Reachable;
|
||||
bool Changed = MarkAliveBlocks(F.begin(), Reachable);
|
||||
|
||||
|
||||
// If there are unreachable blocks in the CFG...
|
||||
if (Reachable.size() == F.size())
|
||||
return Changed;
|
||||
|
||||
|
||||
assert(Reachable.size() < F.size());
|
||||
NumSimpl += F.size()-Reachable.size();
|
||||
|
||||
|
||||
// Loop over all of the basic blocks that are not reachable, dropping all of
|
||||
// their internal references...
|
||||
for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
|
||||
if (Reachable.count(BB))
|
||||
continue;
|
||||
|
||||
|
||||
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
|
||||
if (Reachable.count(*SI))
|
||||
(*SI)->removePredecessor(BB);
|
||||
BB->dropAllReferences();
|
||||
}
|
||||
|
||||
|
||||
for (Function::iterator I = ++F.begin(); I != F.end();)
|
||||
if (!Reachable.count(I))
|
||||
I = F.getBasicBlockList().erase(I);
|
||||
else
|
||||
++I;
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -219,17 +219,17 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
|
||||
/// node) return blocks, merge them together to promote recursive block merging.
|
||||
static bool MergeEmptyReturnBlocks(Function &F) {
|
||||
bool Changed = false;
|
||||
|
||||
|
||||
BasicBlock *RetBlock = 0;
|
||||
|
||||
|
||||
// Scan all the blocks in the function, looking for empty return blocks.
|
||||
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
|
||||
BasicBlock &BB = *BBI++;
|
||||
|
||||
|
||||
// Only look at return blocks.
|
||||
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
|
||||
if (Ret == 0) continue;
|
||||
|
||||
|
||||
// Only look at the block if it is empty or the only other thing in it is a
|
||||
// single PHI node that is the operand to the return.
|
||||
if (Ret != &BB.front()) {
|
||||
@ -251,21 +251,21 @@ static bool MergeEmptyReturnBlocks(Function &F) {
|
||||
RetBlock = &BB;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, we found a duplicate return block. Merge the two.
|
||||
Changed = true;
|
||||
|
||||
|
||||
// Case when there is no input to the return or when the returned values
|
||||
// agree is trivial. Note that they can't agree if there are phis in the
|
||||
// blocks.
|
||||
if (Ret->getNumOperands() == 0 ||
|
||||
Ret->getOperand(0) ==
|
||||
Ret->getOperand(0) ==
|
||||
cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
|
||||
BB.replaceAllUsesWith(RetBlock);
|
||||
BB.eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If the canonical return block has no PHI node, create one now.
|
||||
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
|
||||
if (RetBlockPHI == 0) {
|
||||
@ -274,12 +274,12 @@ static bool MergeEmptyReturnBlocks(Function &F) {
|
||||
RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
|
||||
std::distance(PB, PE), "merge",
|
||||
&RetBlock->front());
|
||||
|
||||
|
||||
for (pred_iterator PI = PB; PI != PE; ++PI)
|
||||
RetBlockPHI->addIncoming(InVal, *PI);
|
||||
RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
|
||||
}
|
||||
|
||||
|
||||
// Turn BB into a block that just unconditionally branches to the return
|
||||
// block. This handles the case when the two return blocks have a common
|
||||
// predecessor but that return different things.
|
||||
@ -287,7 +287,7 @@ static bool MergeEmptyReturnBlocks(Function &F) {
|
||||
BB.getTerminator()->eraseFromParent();
|
||||
BranchInst::Create(RetBlock, &BB);
|
||||
}
|
||||
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
@ -298,7 +298,7 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
|
||||
bool LocalChange = true;
|
||||
while (LocalChange) {
|
||||
LocalChange = false;
|
||||
|
||||
|
||||
// Loop over all of the basic blocks and remove them if they are unneeded...
|
||||
//
|
||||
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
|
||||
@ -327,7 +327,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
|
||||
// IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
|
||||
// RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
|
||||
// iterate between the two optimizations. We structure the code like this to
|
||||
// avoid reruning IterativeSimplifyCFG if the second pass of
|
||||
// avoid reruning IterativeSimplifyCFG if the second pass of
|
||||
// RemoveUnreachableBlocksFromFn doesn't do anything.
|
||||
if (!RemoveUnreachableBlocksFromFn(F))
|
||||
return true;
|
||||
|
@ -100,7 +100,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool CallHasFloatingPointArgument(const CallInst *CI) {
|
||||
for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
|
||||
it != e; ++it) {
|
||||
@ -256,7 +256,7 @@ struct StrChrOpt : public LibCallOptimization {
|
||||
ConstantInt::get(TD->getIntPtrType(*Context), Len),
|
||||
B, TD);
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, the character is a constant, see if the first argument is
|
||||
// a string literal. If so, we can constant fold.
|
||||
StringRef Str;
|
||||
@ -1514,7 +1514,7 @@ namespace {
|
||||
///
|
||||
class SimplifyLibCalls : public FunctionPass {
|
||||
TargetLibraryInfo *TLI;
|
||||
|
||||
|
||||
StringMap<LibCallOptimization*> Optimizations;
|
||||
// String and Memory LibCall Optimizations
|
||||
StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
|
||||
@ -1534,7 +1534,7 @@ namespace {
|
||||
SPrintFOpt SPrintF; PrintFOpt PrintF;
|
||||
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
|
||||
PutsOpt Puts;
|
||||
|
||||
|
||||
bool Modified; // This is only used by doInitialization.
|
||||
public:
|
||||
static char ID; // Pass identification
|
||||
@ -1767,7 +1767,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
|
||||
|
||||
void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
|
||||
FunctionType *FTy = F.getFunctionType();
|
||||
|
||||
|
||||
StringRef Name = F.getName();
|
||||
switch (Name[0]) {
|
||||
case 's':
|
||||
|
@ -40,9 +40,9 @@ namespace {
|
||||
Sinking() : FunctionPass(ID) {
|
||||
initializeSinkingPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
||||
virtual bool runOnFunction(Function &F);
|
||||
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
@ -59,7 +59,7 @@ namespace {
|
||||
bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
|
||||
char Sinking::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
|
||||
@ -71,7 +71,7 @@ FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
|
||||
|
||||
/// AllUsesDominatedByBlock - Return true if all uses of the specified value
|
||||
/// occur in blocks dominated by the specified block.
|
||||
bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
|
||||
bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
|
||||
BasicBlock *BB) const {
|
||||
// Ignoring debug uses is necessary so debug info doesn't affect the code.
|
||||
// This may leave a referencing dbg_value in the original block, before
|
||||
@ -101,18 +101,18 @@ bool Sinking::runOnFunction(Function &F) {
|
||||
AA = &getAnalysis<AliasAnalysis>();
|
||||
|
||||
bool MadeChange, EverMadeChange = false;
|
||||
|
||||
|
||||
do {
|
||||
MadeChange = false;
|
||||
DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n");
|
||||
// Process all basic blocks.
|
||||
for (Function::iterator I = F.begin(), E = F.end();
|
||||
for (Function::iterator I = F.begin(), E = F.end();
|
||||
I != E; ++I)
|
||||
MadeChange |= ProcessBlock(*I);
|
||||
EverMadeChange |= MadeChange;
|
||||
NumSinkIter++;
|
||||
} while (MadeChange);
|
||||
|
||||
|
||||
return EverMadeChange;
|
||||
}
|
||||
|
||||
@ -121,8 +121,8 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
|
||||
if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
|
||||
|
||||
// Don't bother sinking code out of unreachable blocks. In addition to being
|
||||
// unprofitable, it can also lead to infinite looping, because in an unreachable
|
||||
// loop there may be nowhere to stop.
|
||||
// unprofitable, it can also lead to infinite looping, because in an
|
||||
// unreachable loop there may be nowhere to stop.
|
||||
if (!DT->isReachableFromEntry(&BB)) return false;
|
||||
|
||||
bool MadeChange = false;
|
||||
@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
|
||||
SmallPtrSet<Instruction *, 8> Stores;
|
||||
do {
|
||||
Instruction *Inst = I; // The instruction to sink.
|
||||
|
||||
|
||||
// Predecrement I (if it's not begin) so that it isn't invalidated by
|
||||
// sinking.
|
||||
ProcessedBegin = I == BB.begin();
|
||||
@ -146,10 +146,10 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
|
||||
|
||||
if (SinkInstruction(Inst, Stores))
|
||||
++NumSunk, MadeChange = true;
|
||||
|
||||
|
||||
// If we just processed the first instruction in the block, we're done.
|
||||
} while (!ProcessedBegin);
|
||||
|
||||
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
@ -177,16 +177,17 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
|
||||
|
||||
/// IsAcceptableTarget - Return true if it is possible to sink the instruction
|
||||
/// in the specified basic block.
|
||||
bool Sinking::IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const {
|
||||
bool Sinking::IsAcceptableTarget(Instruction *Inst,
|
||||
BasicBlock *SuccToSinkTo) const {
|
||||
assert(Inst && "Instruction to be sunk is null");
|
||||
assert(SuccToSinkTo && "Candidate sink target is null");
|
||||
|
||||
|
||||
// It is not possible to sink an instruction into its own block. This can
|
||||
// happen with loops.
|
||||
if (Inst->getParent() == SuccToSinkTo)
|
||||
return false;
|
||||
|
||||
// If the block has multiple predecessors, this would introduce computation
|
||||
|
||||
// If the block has multiple predecessors, this would introduce computation
|
||||
// on different code paths. We could split the critical edge, but for now we
|
||||
// just punt.
|
||||
// FIXME: Split critical edges if not backedges.
|
||||
@ -195,18 +196,19 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) co
|
||||
// other code paths.
|
||||
if (!isSafeToSpeculativelyExecute(Inst))
|
||||
return false;
|
||||
|
||||
|
||||
// We don't want to sink across a critical edge if we don't dominate the
|
||||
// successor. We could be introducing calculations to new code paths.
|
||||
if (!DT->dominates(Inst->getParent(), SuccToSinkTo))
|
||||
return false;
|
||||
|
||||
|
||||
// Don't sink instructions into a loop.
|
||||
Loop *succ = LI->getLoopFor(SuccToSinkTo), *cur = LI->getLoopFor(Inst->getParent());
|
||||
Loop *succ = LI->getLoopFor(SuccToSinkTo);
|
||||
Loop *cur = LI->getLoopFor(Inst->getParent());
|
||||
if (succ != 0 && succ != cur)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Finally, check that all the uses of the instruction are actually
|
||||
// dominated by the candidate
|
||||
return AllUsesDominatedByBlock(Inst, SuccToSinkTo);
|
||||
@ -219,7 +221,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
|
||||
// Check if it's safe to move the instruction.
|
||||
if (!isSafeToMove(Inst, AA, Stores))
|
||||
return false;
|
||||
|
||||
|
||||
// FIXME: This should include support for sinking instructions within the
|
||||
// block they are currently in to shorten the live ranges. We often get
|
||||
// instructions sunk into the top of a large block, but it would be better to
|
||||
@ -227,41 +229,41 @@ bool Sinking::SinkInstruction(Instruction *Inst,
|
||||
// be careful not to *increase* register pressure though, e.g. sinking
|
||||
// "x = y + z" down if it kills y and z would increase the live ranges of y
|
||||
// and z and only shrink the live range of x.
|
||||
|
||||
|
||||
// SuccToSinkTo - This is the successor to sink this instruction to, once we
|
||||
// decide.
|
||||
BasicBlock *SuccToSinkTo = 0;
|
||||
|
||||
|
||||
// Instructions can only be sunk if all their uses are in blocks
|
||||
// dominated by one of the successors.
|
||||
// Look at all the postdominators and see if we can sink it in one.
|
||||
DomTreeNode *DTN = DT->getNode(Inst->getParent());
|
||||
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
|
||||
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
|
||||
I != E && SuccToSinkTo == 0; ++I) {
|
||||
BasicBlock *Candidate = (*I)->getBlock();
|
||||
if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
|
||||
if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
|
||||
IsAcceptableTarget(Inst, Candidate))
|
||||
SuccToSinkTo = Candidate;
|
||||
}
|
||||
|
||||
// If no suitable postdominator was found, look at all the successors and
|
||||
// If no suitable postdominator was found, look at all the successors and
|
||||
// decide which one we should sink to, if any.
|
||||
for (succ_iterator I = succ_begin(Inst->getParent()),
|
||||
for (succ_iterator I = succ_begin(Inst->getParent()),
|
||||
E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
|
||||
if (IsAcceptableTarget(Inst, *I))
|
||||
SuccToSinkTo = *I;
|
||||
}
|
||||
|
||||
|
||||
// If we couldn't find a block to sink to, ignore this instruction.
|
||||
if (SuccToSinkTo == 0)
|
||||
return false;
|
||||
|
||||
|
||||
DEBUG(dbgs() << "Sink" << *Inst << " (";
|
||||
WriteAsOperand(dbgs(), Inst->getParent(), false);
|
||||
WriteAsOperand(dbgs(), Inst->getParent(), false);
|
||||
dbgs() << " -> ";
|
||||
WriteAsOperand(dbgs(), SuccToSinkTo, false);
|
||||
WriteAsOperand(dbgs(), SuccToSinkTo, false);
|
||||
dbgs() << ")\n");
|
||||
|
||||
|
||||
// Move the instruction.
|
||||
Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
|
||||
return true;
|
||||
|
@ -172,7 +172,7 @@ bool TailCallElim::runOnFunction(Function &F) {
|
||||
FunctionContainsEscapingAllocas |=
|
||||
CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
|
||||
}
|
||||
|
||||
|
||||
/// FIXME: The code generator produces really bad code when an 'escaping
|
||||
/// alloca' is changed from being a static alloca to being a dynamic alloca.
|
||||
/// Until this is resolved, disable this transformation if that would ever
|
||||
@ -234,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
|
||||
// call does not mod/ref the memory location being processed.
|
||||
if (I->mayHaveSideEffects()) // This also handles volatile loads.
|
||||
return false;
|
||||
|
||||
|
||||
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
|
||||
// Loads may always be moved above calls without side effects.
|
||||
if (CI->mayHaveSideEffects()) {
|
||||
@ -364,7 +364,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
|
||||
|
||||
if (&BB->front() == TI) // Make sure there is something before the terminator.
|
||||
return 0;
|
||||
|
||||
|
||||
// Scan backwards from the return, checking to see if there is a tail call in
|
||||
// this block. If so, set CI to it.
|
||||
CallInst *CI = 0;
|
||||
@ -388,7 +388,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
|
||||
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
|
||||
// and disable this xform in this case, because the code generator will
|
||||
// lower the call to fabs into inline code.
|
||||
if (BB == &F->getEntryBlock() &&
|
||||
if (BB == &F->getEntryBlock() &&
|
||||
FirstNonDbg(BB->front()) == CI &&
|
||||
FirstNonDbg(llvm::next(BB->begin())) == TI &&
|
||||
callIsSmall(CI)) {
|
||||
@ -432,7 +432,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
|
||||
BasicBlock::iterator BBI = CI;
|
||||
for (++BBI; &*BBI != Ret; ++BBI) {
|
||||
if (CanMoveAboveCall(BBI, CI)) continue;
|
||||
|
||||
|
||||
// If we can't move the instruction above the call, it might be because it
|
||||
// is an associative and commutative operation that could be transformed
|
||||
// using accumulator recursion elimination. Check to see if this is the
|
||||
|
Loading…
Reference in New Issue
Block a user