From 83d63919bd990ce00f62e18114504b9e4a5cb35e Mon Sep 17 00:00:00 2001 From: Victor Hernandez Date: Fri, 18 Sep 2009 22:35:49 +0000 Subject: [PATCH] Enhance transform passes so that they apply the same tranforms to malloc calls as to MallocInst. Reviewed by Dan Gohman. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82300 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/FunctionAttrs.cpp | 12 +- lib/Transforms/IPO/GlobalOpt.cpp | 425 +++++++++++++++++- lib/Transforms/Scalar/GVN.cpp | 11 +- .../Scalar/InstructionCombining.cpp | 62 ++- lib/Transforms/Scalar/Reassociate.cpp | 3 +- lib/Transforms/Scalar/SCCP.cpp | 8 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 8 +- lib/Transforms/Scalar/TailDuplication.cpp | 3 +- lib/Transforms/Utils/InlineCost.cpp | 3 +- 9 files changed, 512 insertions(+), 23 deletions(-) diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 26b4152291e..14c94da5595 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" @@ -152,8 +153,8 @@ bool FunctionAttrs::AddReadAttrs(const std::vector &SCC) { // Writes memory. Just give up. return false; - if (isa(I)) - // MallocInst claims not to write memory! PR3754. + if (isa(I) || isMalloc(I)) + // malloc claims not to write memory! PR3754. return false; // If this instruction may read memory, remember that. @@ -247,8 +248,11 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, if (Instruction *RVI = dyn_cast(RetVal)) switch (RVI->getOpcode()) { // Extend the analysis by looking upwards. - case Instruction::GetElementPtr: case Instruction::BitCast: + if (isMalloc(RVI)) + break; + // fall through + case Instruction::GetElementPtr: FlowsToReturn.insert(RVI->getOperand(0)); continue; case Instruction::Select: { @@ -267,6 +271,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, case Instruction::Malloc: break; case Instruction::Call: + if (isMalloc(RVI)) + break; case Instruction::Invoke: { CallSite CS(RVI); if (CS.paramHasAttr(0, Attribute::NoAlias)) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 63bc03d7872..950a23956fc 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -24,6 +24,7 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" @@ -939,6 +940,138 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, return NewGV; } +/// OptimizeGlobalAddressOfMalloc - This function takes the specified global +/// variable, and transforms the program as if it always contained the result of +/// the specified malloc. Because it is always the result of the specified +/// malloc, there is no reason to actually DO the malloc. Instead, turn the +/// malloc into a global, and any loads of GV as uses of the new global. +static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, + CallInst *CI, + BitCastInst *BCI, + LLVMContext &Context, + TargetData* TD) { + const Type *IntPtrTy = TD->getIntPtrType(Context); + + DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *CI); + + ConstantInt *NElements = cast(getMallocArraySize(CI, + Context, TD)); + if (NElements->getZExtValue() != 1) { + // If we have an array allocation, transform it to a single element + // allocation to make the code below simpler. + Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), + NElements->getZExtValue()); + Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); + Instruction* NewMI = cast(NewM); + Value* Indices[2]; + Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); + Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, + NewMI->getName()+".el0", CI); + BCI->replaceAllUsesWith(NewGEP); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } + + // Create the new global variable. The contents of the malloc'd memory is + // undefined, so initialize with an undef value. + // FIXME: This new global should have the alignment returned by malloc. Code + // could depend on malloc returning large alignment (on the mac, 16 bytes) but + // this would only guarantee some lower alignment. + const Type *MAT = getMallocAllocatedType(CI); + Constant *Init = UndefValue::get(MAT); + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + MAT, false, + GlobalValue::InternalLinkage, Init, + GV->getName()+".body", + GV, + GV->isThreadLocal()); + + // Anything that used the malloc now uses the global directly. + BCI->replaceAllUsesWith(NewGV); + + Constant *RepValue = NewGV; + if (NewGV->getType() != GV->getType()->getElementType()) + RepValue = ConstantExpr::getBitCast(RepValue, + GV->getType()->getElementType()); + + // If there is a comparison against null, we will insert a global bool to + // keep track of whether the global was initialized yet or not. + GlobalVariable *InitBool = + new GlobalVariable(Context, Type::getInt1Ty(Context), false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(Context), GV->getName()+".init", + GV->isThreadLocal()); + bool InitBoolUsed = false; + + // Loop over all uses of GV, processing them in turn. + std::vector Stores; + while (!GV->use_empty()) + if (LoadInst *LI = dyn_cast(GV->use_back())) { + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa(LoadUse.getUser())) + LoadUse = RepValue; + else { + ICmpInst *ICI = cast(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); + InitBoolUsed = true; + switch (ICI->getPredicate()) { + default: llvm_unreachable("Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + LV = ConstantInt::getFalse(Context); // X < null -> always false + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", ICI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + ICI->replaceAllUsesWith(LV); + ICI->eraseFromParent(); + } + } + LI->eraseFromParent(); + } else { + StoreInst *SI = cast(GV->use_back()); + // The global is initialized when the store to it occurs. + new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); + SI->eraseFromParent(); + } + + // If the initialization boolean was used, insert it, otherwise delete it. + if (!InitBoolUsed) { + while (!InitBool->use_empty()) // Delete initializations + cast(InitBool->use_back())->eraseFromParent(); + delete InitBool; + } else + GV->getParent()->getGlobalList().insert(GV, InitBool); + + + // Now the GV is dead, nuke it and the malloc. + GV->eraseFromParent(); + BCI->eraseFromParent(); + CI->eraseFromParent(); + + // To further other optimizations, loop over all users of NewGV and try to + // constant prop them. This will promote GEP instructions with constant + // indices into GEP constant-exprs, which will allow global-opt to hack on it. + ConstantPropUsersOf(NewGV, Context); + if (RepValue != NewGV) + ConstantPropUsersOf(RepValue, Context); + + return NewGV; +} + /// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking /// to make sure that there are no complex uses of V. We permit simple things /// like dereferencing the pointer, but not storing through the address, unless @@ -1086,7 +1219,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from /// GV are simple enough to perform HeapSRA, return true. static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, - MallocInst *MI) { + Instruction *StoredVal) { SmallPtrSet LoadUsingPHIs; SmallPtrSet LoadUsingPHIsPerLoad; for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; @@ -1110,7 +1243,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, Value *InVal = PN->getIncomingValue(op); // PHI of the stored value itself is ok. - if (InVal == MI) continue; + if (InVal == StoredVal) continue; if (PHINode *InPN = dyn_cast(InVal)) { // One of the PHIs in our set is (optimistically) ok. @@ -1444,6 +1577,191 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI, return cast(FieldGlobals[0]); } +/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, + CallInst *CI, BitCastInst* BCI, + LLVMContext &Context, + TargetData *TD){ + DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI + << " BITCAST = " << *BCI << '\n'); + const Type* MAT = getMallocAllocatedType(CI); + const StructType *STy = cast(MAT); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(BCI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as CI, and N globals. + std::vector FieldGlobals; + std::vector FieldMallocs; + + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + const Type *FieldTy = STy->getElementType(FieldNo); + const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + + GlobalVariable *NGV = + new GlobalVariable(*GV->getParent(), + PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), + GV->getName() + ".f" + Twine(FieldNo), GV, + GV->isThreadLocal()); + FieldGlobals.push_back(NGV); + + Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy, + getMallocArraySize(CI, Context, TD), + BCI->getName() + ".f" + Twine(FieldNo)); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, BCI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + Value *RunningOr = 0; + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); + if (!RunningOr) + RunningOr = Cond; // First seteq + else + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = BCI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType()), + "tmp"); + BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", + OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create(Context, "next", + OrigBB->getParent()); + BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); + + // Fill in FreeBlock. + new FreeInst(GVVal, FreeBlock); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + BranchInst::Create(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + BranchInst::Create(ContBB, NullPtrBlock); + + // CI and BCI are no longer needed, remove them. + BCI->eraseFromParent(); + CI->eraseFromParent(); + + /// InsertedScalarizedLoads - As we process loads, if we can't immediately + /// update all uses of the load, keep track of what scalarized loads are + /// inserted for a given load. + DenseMap > InsertedScalarizedValues; + InsertedScalarizedValues[GV] = FieldGlobals; + + std::vector > PHIsToRewrite; + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { + Instruction *User = cast(*UI++); + + if (LoadInst *LI = dyn_cast(User)) { + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, + Context); + continue; + } + + // Must be a store of null. + StoreInst *SI = cast(User); + assert(isa(SI->getOperand(0)) && + "Unexpected heap-sra user!"); + + // Insert a store of null into each global. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + const PointerType *PT = cast(FieldGlobals[i]->getType()); + Constant *Null = Constant::getNullValue(PT->getElementType()); + new StoreInst(Null, FieldGlobals[i], SI); + } + // Erase the original store. + SI->eraseFromParent(); + } + + // While we have PHIs that are interesting to rewrite, do it. + while (!PHIsToRewrite.empty()) { + PHINode *PN = PHIsToRewrite.back().first; + unsigned FieldNo = PHIsToRewrite.back().second; + PHIsToRewrite.pop_back(); + PHINode *FieldPN = cast(InsertedScalarizedValues[PN][FieldNo]); + assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); + + // Add all the incoming values. This can materialize more phis. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, + PHIsToRewrite, Context); + FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); + } + } + + // Drop all inter-phi links and any loads that made it this far. + for (DenseMap >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast(I->first)) + PN->dropAllReferences(); + else if (LoadInst *LI = dyn_cast(I->first)) + LI->dropAllReferences(); + } + + // Delete all the phis and loads now that inter-references are dead. + for (DenseMap >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast(I->first)) + PN->eraseFromParent(); + else if (LoadInst *LI = dyn_cast(I->first)) + LI->eraseFromParent(); + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return cast(FieldGlobals[0]); +} + /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a /// pointer global variable with a single value stored it that is a malloc or /// cast of malloc. @@ -1533,6 +1851,99 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, return false; } +/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a +/// pointer global variable with a single value stored it that is a malloc or +/// cast of malloc. +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, + CallInst *CI, + BitCastInst *BCI, + Module::global_iterator &GVI, + TargetData *TD, + LLVMContext &Context) { + // If we can't figure out the type being malloced, then we can't optimize. + const Type *AllocTy = getMallocAllocatedType(CI); + assert(AllocTy); + + // If this is a malloc of an abstract type, don't touch it. + if (!AllocTy->isSized()) + return false; + + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded setcc'd, and + // GEP'd. These are all things we could transform to using the global + // for. + { + SmallPtrSet PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + return false; + } + + // If we have a global that is only initialized with a fixed size malloc, + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. + if (ConstantInt *NElements = + dyn_cast(getMallocArraySize(CI, Context, TD))) { + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (TD && + NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); + return true; + } + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (!isArrayMalloc(CI, Context, TD)) + if (const ArrayType *AT = dyn_cast(AllocTy)) + AllocTy = AT->getElementType(); + + if (const StructType *AllocSTy = dyn_cast(AllocTy)) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast(getMallocAllocatedType(CI))) { + Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), + AT->getNumElements()); + Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), + AllocSTy, NumElements, + BCI->getName()); + Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); + BCI->replaceAllUsesWith(Cast); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } + + GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); + return true; + } + } + + return false; +} + // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge // that only one value (besides its initializer) is ever stored to the global. static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, @@ -1558,6 +1969,16 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, } else if (MallocInst *MI = dyn_cast(StoredOnceVal)) { if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context)) return true; + } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { + if (getMallocAllocatedType(CI)) { + BitCastInst* BCI = NULL; + for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + BCI = dyn_cast(cast(*UI++)); + if (BCI && + TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context)) + return true; + } } } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 36c90f519f2..9f72659d215 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" @@ -982,7 +983,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa(DepInst)) { + if (isa(DepInst) || isMalloc(DepInst)) { ValuesPerBlock.push_back(std::make_pair(DepBB, UndefValue::get(LI->getType()))); continue; @@ -1270,7 +1271,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa(DepInst)) { + if (isa(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); toErase.push_back(L); NumGVNLoad++; @@ -1393,7 +1394,7 @@ bool GVN::processInstruction(Instruction *I, // Allocations are always uniquely numbered, so we can save time and memory // by fast failing them. - } else if (isa(I) || isa(I)) { + } else if (isa(I) || isMalloc(I) || isa(I)) { localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); return false; } @@ -1558,8 +1559,8 @@ bool GVN::performPRE(Function& F) { BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; - if (isa(CurInst) || isa(CurInst) || - isa(CurInst) || + if (isa(CurInst) || isMalloc(CurInst) || + isa(CurInst) || isa(CurInst) || (CurInst->getType() == Type::getVoidTy(F.getContext())) || CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa(CurInst)) diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index e6bd1929767..8d61fce4b43 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -42,6 +42,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -89,6 +90,7 @@ namespace { /// Add - Add the specified instruction to the worklist if it isn't already /// in it. void Add(Instruction *I) { + DEBUG(errs() << "IC: ADD: " << *I << '\n'); if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) Worklist.push_back(I); } @@ -326,7 +328,7 @@ namespace { // instruction. Instead, visit methods should return the value returned by // this function. Instruction *EraseInstFromFunction(Instruction &I) { - DEBUG(errs() << "IC: erase " << I << '\n'); + DEBUG(errs() << "IC: ERASE " << I << '\n'); assert(I.use_empty() && "Cannot erase instruction that is used!"); // Make sure that we reprocess all operands now that we reduced their @@ -5891,9 +5893,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // icmp , - Global/Stack value // addresses never equal each other! We already know that Op0 != Op1. - if ((isa(Op0) || isa(Op0) || + if ((isa(Op0) || isa(Op0) || isMalloc(Op0) || isa(Op0)) && - (isa(Op1) || isa(Op1) || + (isa(Op1) || isa(Op1) || isMalloc(Op1) || isa(Op1))) return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), !I.isTrueWhenEqual())); @@ -6231,8 +6233,33 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // can assume it is successful and remove the malloc. if (LHSI->hasOneUse() && isa(RHSC)) { Worklist.Add(LHSI); - return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); + } + break; + case Instruction::Call: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + if (isMalloc(LHSI) && LHSI->hasOneUse() && + isa(RHSC)) { + Worklist.Add(LHSI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); + } + break; + case Instruction::BitCast: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + CallInst* CI = extractMallocCallFromBitCast(LHSI); + if (CI && CI->hasOneUse() && LHSI->hasOneUse() + && isa(RHSC)) { + Worklist.Add(LHSI); + Worklist.Add(CI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); } break; } @@ -8784,8 +8811,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - // If we are casting a malloc or alloca to a pointer to a type of the same + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. + // There is no need to modify malloc calls because it is their bitcast that + // needs to be cleaned up. if (AllocationInst *AI = dyn_cast(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; @@ -9459,6 +9488,7 @@ static unsigned EnforceKnownAlignment(Value *V, Align = PrefAlign; } } + // No alignment changes are possible for malloc calls } return Align; @@ -9796,7 +9826,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa(BI)) { + if (isa(BI) || isMalloc(BI)) { CannotRemove = true; break; } @@ -11060,7 +11090,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (Offset == 0) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa(BCI->getOperand(0))) { + if (isa(BCI->getOperand(0)) || + isMalloc(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -11191,6 +11222,21 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { EraseInstFromFunction(FI); return EraseInstFromFunction(*MI); } + if (isMalloc(Op)) { + if (CallInst* CI = extractMallocCallFromBitCast(Op)) { + if (Op->hasOneUse() && CI->hasOneUse()) { + EraseInstFromFunction(FI); + EraseInstFromFunction(*CI); + return EraseInstFromFunction(*cast(Op)); + } + } else { + // Op is a call to malloc + if (Op->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*cast(Op)); + } + } + } return 0; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index e6ffac251b7..9160654c3d8 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -29,6 +29,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -121,7 +122,7 @@ static bool isUnmovableInstruction(Instruction *I) { if (I->getOpcode() == Instruction::PHI || I->getOpcode() == Instruction::Alloca || I->getOpcode() == Instruction::Load || - I->getOpcode() == Instruction::Malloc || + I->getOpcode() == Instruction::Malloc || isMalloc(I) || I->getOpcode() == Instruction::Invoke || (I->getOpcode() == Instruction::Call && !isa(I)) || diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index e1f0ba535cd..f0b5d7702e3 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -30,6 +30,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CallSite.h" @@ -400,7 +401,12 @@ private: void visitStoreInst (Instruction &I); void visitLoadInst (LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); - void visitCallInst (CallInst &I) { visitCallSite(CallSite::get(&I)); } + void visitCallInst (CallInst &I) { + if (isMalloc(&I)) + markOverdefined(&I); + else + visitCallSite(CallSite::get(&I)); + } void visitInvokeInst (InvokeInst &II) { visitCallSite(CallSite::get(&II)); visitTerminatorInst(II); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 419d66f7af4..96ea0e45adf 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -1853,7 +1853,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'm': - if (Name == "memcmp") { + if (Name == "malloc") { + if (FTy->getNumParams() != 1 || + !isa(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "memcmp") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 4bad7081166..8345175e137 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -28,6 +28,7 @@ #include "llvm/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -129,7 +130,7 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, if (isa(I) || isa(I)) return false; // Allso alloca and malloc. - if (isa(I)) return false; + if (isa(I) || isMalloc(I)) return false; // Some vector instructions can expand into a number of instructions. if (isa(I) || isa(I) || diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp index fe4d060e9ac..7bf23661587 100644 --- a/lib/Transforms/Utils/InlineCost.cpp +++ b/lib/Transforms/Utils/InlineCost.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Utils/InlineCost.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Support/CallSite.h" #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" @@ -51,7 +52,7 @@ unsigned InlineCostAnalyzer::FunctionInfo:: // Unfortunately, we don't know the pointer that may get propagated here, // so we can't make this decision. if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa(Inst)) + isa(Inst) || isMalloc(&Inst)) continue; bool AllOperandsConstant = true;