diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 27862122601..f705d2d50c5 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -36,6 +36,7 @@ namespace { Statistic<> NumMarked ("globalopt", "Number of globals marked constant"); Statistic<> NumSRA ("globalopt", "Number of aggregate globals broken " "into scalars"); + Statistic<> NumHeapSRA ("globalopt", "Number of heap objects SRA'd"); Statistic<> NumSubstitute("globalopt", "Number of globals with initializers stored into them"); Statistic<> NumDeleted ("globalopt", "Number of globals deleted"); @@ -794,9 +795,235 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, return false; } return true; - } +/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV +/// somewhere. Transform all uses of the allocation into loads from the +/// global and uses of the resultant pointer. Further, delete the store into +/// GV. This assumes that these value pass the +/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, + GlobalVariable *GV) { + while (!Alloc->use_empty()) { + Instruction *U = Alloc->use_back(); + if (StoreInst *SI = dyn_cast(U)) { + // If this is the store of the allocation into the global, remove it. + if (SI->getOperand(1) == GV) { + SI->eraseFromParent(); + continue; + } + } + + // Insert a load from the global, and use it instead of the malloc. + Value *NL = new LoadInst(GV, GV->getName()+".val", U); + U->replaceUsesOfWith(Alloc, NL); + } +} + +/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from +/// GV are simple enough to perform HeapSRA, return true. +static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; + ++UI) + if (LoadInst *LI = dyn_cast(*UI)) { + // We permit two users of the load: setcc comparing against the null + // pointer, and a getelementptr of a specific form. + for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end(); UI != E; + ++UI) { + // Comparison against null is ok. + if (SetCondInst *SCI = dyn_cast(*UI)) { + if (!isa(SCI->getOperand(1))) + return false; + continue; + } + + // getelementptr is also ok, but only a simple form. + GetElementPtrInst *GEPI = dyn_cast(*UI); + if (!GEPI) return false; + + // Must index into the array and into the struct. + if (GEPI->getNumOperands() < 3) + return false; + + // Otherwise the GEP is ok. + continue; + } + } + return true; +} + +/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr +/// is a value loaded from the global. Eliminate all uses of Ptr, making them +/// use FieldGlobals instead. All uses of loaded values satisfy +/// GlobalLoadUsesSimpleEnoughForHeapSRA. +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Ptr, + const std::vector &FieldGlobals) { + std::vector InsertedLoadsForPtr; + //InsertedLoadsForPtr.resize(FieldGlobals.size()); + while (!Ptr->use_empty()) { + Instruction *User = Ptr->use_back(); + + // If this is a comparison against null, handle it. + if (SetCondInst *SCI = dyn_cast(User)) { + assert(isa(SCI->getOperand(1))); + // If we have a setcc of the loaded pointer, we can use a setcc of any + // field. + Value *NPtr; + if (InsertedLoadsForPtr.empty()) { + NPtr = new LoadInst(FieldGlobals[0], Ptr->getName()+".f0", Ptr); + InsertedLoadsForPtr.push_back(Ptr); + } else { + NPtr = InsertedLoadsForPtr.back(); + } + + Value *New = new SetCondInst(SCI->getOpcode(), NPtr, + Constant::getNullValue(NPtr->getType()), + SCI->getName(), SCI); + SCI->replaceAllUsesWith(New); + SCI->eraseFromParent(); + continue; + } + + // Otherwise, this should be: 'getelementptr Ptr, Idx, uint FieldNo ...' + GetElementPtrInst *GEPI = cast(User); + assert(GEPI->getNumOperands() >= 3 && isa(GEPI->getOperand(2)) + && "Unexpected GEPI!"); + + // Load the pointer for this field. + unsigned FieldNo = cast(GEPI->getOperand(2))->getValue(); + if (InsertedLoadsForPtr.size() <= FieldNo) + InsertedLoadsForPtr.resize(FieldNo+1); + if (InsertedLoadsForPtr[FieldNo] == 0) + InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo], + Ptr->getName()+".f" + + utostr(FieldNo), Ptr); + Value *NewPtr = InsertedLoadsForPtr[FieldNo]; + + // Create the new GEP idx vector. + std::vector GEPIdx; + GEPIdx.push_back(GEPI->getOperand(1)); + GEPIdx.insert(GEPIdx.end(), GEPI->op_begin()+3, GEPI->op_end()); + + Value *NGEPI = new GetElementPtrInst(NewPtr, GEPIdx, GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NGEPI); + GEPI->eraseFromParent(); + } +} + +/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ + /*DEBUG*/(std::cerr << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI); + const StructType *STy = cast(MI->getAllocatedType()); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(MI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as MI, and N globals. + std::vector FieldGlobals; + std::vector FieldMallocs; + + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + const Type *FieldTy = STy->getElementType(FieldNo); + const Type *PFieldTy = PointerType::get(FieldTy); + + GlobalVariable *NGV = + new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), + GV->getName() + ".f" + utostr(FieldNo), GV); + FieldGlobals.push_back(NGV); + + MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), + MI->getName() + ".f" + utostr(FieldNo),MI); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, MI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + Value *RunningOr = 0; + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new SetCondInst(Instruction::SetEQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull", MI); + if (!RunningOr) + RunningOr = Cond; // First seteq + else + RunningOr = BinaryOperator::createOr(RunningOr, Cond, "tmp", MI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = MI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = new BasicBlock("malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + new BranchInst(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new SetCondInst(Instruction::SetNE, GVVal, + Constant::getNullValue(GVVal->getType()), + "tmp", NullPtrBlock); + BasicBlock *FreeBlock = new BasicBlock("free_it", OrigBB->getParent()); + BasicBlock *NextBlock = new BasicBlock("next", OrigBB->getParent()); + new BranchInst(FreeBlock, NextBlock, Cmp, NullPtrBlock); + + // Fill in FreeBlock. + new FreeInst(GVVal, FreeBlock); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + new BranchInst(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + new BranchInst(ContBB, NullPtrBlock); + + + // MI is no longer needed, remove it. + MI->eraseFromParent(); + + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + while (!GV->use_empty()) { + LoadInst *LI = cast(GV->use_back()); + RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals); + LI->eraseFromParent(); + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return FieldGlobals[0]; +} + + // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge // that only one value (besides its initializer) is ever stored to the global. static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, @@ -835,23 +1062,52 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, if (!MI->getAllocatedType()->isSized()) return false; + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded setcc'd, and + // GEP'd. These are all things we could transform to using the global + // for. + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV)) + return false; + + // If we have a global that is only initialized with a fixed size malloc, - // and if all users of the malloc trap, and if the malloc'd address is not - // put anywhere else, transform the program to use global memory instead - // of malloc'd memory. This eliminates dynamic allocation (good) and - // exposes the resultant global to further GlobalOpt (even better). Note - // that we restrict this transformation to only working on small - // allocations (2048 bytes currently), as we don't want to introduce a 16M - // global or something. + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. if (ConstantInt *NElements = dyn_cast(MI->getArraySize())) { + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. if (NElements->getRawValue()* - TD.getTypeSize(MI->getAllocatedType()) < 2048 && - AllUsesOfLoadedValueWillTrapIfNull(GV) && - ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV)) { + TD.getTypeSize(MI->getAllocatedType()) < 2048) { GVI = OptimizeGlobalAddressOfMalloc(GV, MI); return true; } } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + if (const StructType *AllocTy = + dyn_cast(MI->getAllocatedType())) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 && + GlobalLoadUsesSimpleEnoughForHeapSRA(GV)) { + GVI = PerformHeapAllocSRoA(GV, MI); + return true; + } + } } }