diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 178d55305e2..b11674898fb 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -297,10 +297,10 @@ public: /// \brief Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. - virtual unsigned getIntImmCost(unsigned Opcode, const APInt &Imm, - Type *Ty) const; - virtual unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm, + virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const; + virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const; /// @} /// \name Vector Target Information diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 0dcdd12a409..75d053c6891 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -148,14 +148,14 @@ unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { return PrevTTI->getIntImmCost(Imm, Ty); } -unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, const APInt &Imm, - Type *Ty) const { - return PrevTTI->getIntImmCost(Opcode, Imm, Ty); +unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx, + const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty); } -unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, const APInt &Imm, - Type *Ty) const { - return PrevTTI->getIntImmCost(IID, Imm, Ty); +unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty); } unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { @@ -539,12 +539,12 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { return TCC_Basic; } - unsigned getIntImmCost(unsigned Opcode, const APInt &Imm, + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override { return TCC_Free; } - unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm, + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const override { return TCC_Free; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 1a0208c1a52..87a5dd6536b 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -103,9 +103,9 @@ public: unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; - unsigned getIntImmCost(unsigned Opcode, const APInt &Imm, + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override; - unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm, + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const override; /// @} @@ -776,6 +776,9 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { if (BitSize == 0) return ~0U; + if (Imm == 0) + return TCC_Free; + if (Imm.getBitWidth() <= 64 && (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue()))) return TCC_Basic; @@ -783,7 +786,7 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { return 2 * TCC_Basic; } -unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm, +unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); @@ -791,7 +794,15 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm, if (BitSize == 0) return ~0U; + unsigned ImmIdx = ~0U; switch (Opcode) { + default: return TCC_Free; + case Instruction::GetElementPtr: + if (Idx != 0) + return TCC_Free; + case Instruction::Store: + ImmIdx = 0; + break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -806,28 +817,31 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm, case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: - if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) - return TCC_Free; - else - return X86TTI::getIntImmCost(Imm, Ty); + ImmIdx = 1; + break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::BitCast: + case Instruction::PHI: case Instruction::Call: case Instruction::Select: case Instruction::Ret: case Instruction::Load: - case Instruction::Store: - return X86TTI::getIntImmCost(Imm, Ty); + break; } - return TargetTransformInfo::getIntImmCost(Opcode, Imm, Ty); + + if ((Idx == ImmIdx) && + Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) + return TCC_Free; + + return X86TTI::getIntImmCost(Imm, Ty); } -unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm, - Type *Ty) const { +unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -835,21 +849,24 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm, return ~0U; switch (IID) { - default: return TargetTransformInfo::getIntImmCost(IID, Imm, Ty); + default: return TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: - if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) + if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) return TCC_Free; else return X86TTI::getIntImmCost(Imm, Ty); case Intrinsic::experimental_stackmap: + if (Idx < 2) + return TCC_Free; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - if (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())) + if ((Idx < 4 ) || + (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TCC_Free; else return X86TTI::getIntImmCost(Imm, Ty); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 6cfcec547d0..016f7c17111 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -35,15 +35,14 @@ #define DEBUG_TYPE "consthoist" #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -51,42 +50,80 @@ using namespace llvm; STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); - namespace { -typedef SmallVector<User *, 4> ConstantUseListType; -struct ConstantCandidate { - unsigned CumulativeCost; - ConstantUseListType Uses; +struct ConstantUser; +struct RebasedConstantInfo; + +typedef SmallVector<ConstantUser, 8> ConstantUseListType; +typedef SmallVector<RebasedConstantInfo, 4> RebasedConstantListType; + +/// \brief Keeps track of the user of a constant and the operand index where the +/// constant is used. +struct ConstantUser { + Instruction *Inst; + unsigned OpndIdx; + + ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { } }; +/// \brief Keeps track of a constant candidate and its usees. +struct ConstantCandidate { + ConstantUseListType Uses; + ConstantInt *ConstInt; + unsigned CumulativeCost; + + ConstantCandidate(ConstantInt *ConstInt) + : ConstInt(ConstInt), CumulativeCost(0) { } + + /// \brief Add the user to the use list and update the cost. + void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) { + CumulativeCost += Cost; + Uses.push_back(ConstantUser(Inst, Idx)); + } +}; + +/// \brief This represents a constant that has been rebased with respect to a +/// base constant. The difference to the base constant is recorded in Offset. +struct RebasedConstantInfo { + ConstantUseListType Uses; + Constant *Offset; + mutable BasicBlock *IDom; + + RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset) + : Uses(Uses), Offset(Offset), IDom(nullptr) { } +}; + +/// \brief A base constant and all its rebased constants. struct ConstantInfo { ConstantInt *BaseConstant; - struct RebasedConstantInfo { - ConstantInt *OriginalConstant; - Constant *Offset; - ConstantUseListType Uses; - }; - typedef SmallVector<RebasedConstantInfo, 4> RebasedConstantListType; RebasedConstantListType RebasedConstants; }; +/// \brief The constant hoisting pass. class ConstantHoisting : public FunctionPass { + typedef DenseMap<ConstantInt *, unsigned> ConstCandMapType; + typedef std::vector<ConstantCandidate> ConstCandVecType; + const TargetTransformInfo *TTI; DominatorTree *DT; + BasicBlock *Entry; - /// Keeps track of expensive constants found in the function. - typedef MapVector<ConstantInt *, ConstantCandidate> ConstantMapType; - ConstantMapType ConstantMap; + /// Keeps track of constant candidates found in the function. + ConstCandMapType ConstCandMap; + ConstCandVecType ConstCandVec; + + /// Keep track of cast instructions we already cloned. + SmallDenseMap<Instruction *, Instruction *> ClonedCastMap; /// These are the final constants we decided to hoist. - SmallVector<ConstantInfo, 4> Constants; + SmallVector<ConstantInfo, 8> ConstantVec; public: static char ID; // Pass identification, replacement for typeid - ConstantHoisting() : FunctionPass(ID), TTI(0) { + ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) { initializeConstantHoistingPass(*PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F) override; + bool runOnFunction(Function &Fn) override; const char *getPassName() const override { return "Constant Hoisting"; } @@ -97,19 +134,49 @@ public: } private: - void CollectConstant(User *U, unsigned Opcode, Intrinsic::ID IID, - ConstantInt *C); - void CollectConstants(Instruction *I); - void CollectConstants(Function &F); - void FindAndMakeBaseConstant(ConstantMapType::iterator S, - ConstantMapType::iterator E); - void FindBaseConstants(); - Instruction *FindConstantInsertionPoint(Function &F, - const ConstantInfo &CI) const; - void EmitBaseConstants(Function &F, User *U, Instruction *Base, - Constant *Offset, ConstantInt *OriginalConstant); - bool EmitBaseConstants(Function &F); - bool OptimizeConstants(Function &F); + /// \brief Initialize the pass. + void setup(Function &Fn) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TTI = &getAnalysis<TargetTransformInfo>(); + Entry = &Fn.getEntryBlock(); + } + + /// \brief Cleanup. + void cleanup() { + ConstantVec.clear(); + ClonedCastMap.clear(); + ConstCandVec.clear(); + ConstCandMap.clear(); + + TTI = nullptr; + DT = nullptr; + Entry = nullptr; + } + + /// \brief Find the common dominator of all uses and cache the result for + /// future lookup. + BasicBlock *getIDom(const RebasedConstantInfo &RCI) const { + if (RCI.IDom) + return RCI.IDom; + RCI.IDom = findIDomOfAllUses(RCI.Uses); + assert(RCI.IDom && "Invalid IDom."); + return RCI.IDom; + } + + BasicBlock *findIDomOfAllUses(const ConstantUseListType &Uses) const; + Instruction *findMatInsertPt(Instruction *I, unsigned Idx = ~0U) const; + Instruction *findConstantInsertionPoint(const ConstantInfo &CI) const; + void collectConstantCandidates(Instruction *I, unsigned Idx, ConstantInt *C); + void collectConstantCandidates(Instruction *I); + void collectConstantCandidates(Function &Fn); + void findAndMakeBaseConstant(ConstCandVecType::iterator S, + ConstCandVecType::iterator E); + void findBaseConstants(); + void emitBaseConstants(Instruction *Base, Constant *Offset, + const ConstantUser &CU); + bool emitBaseConstants(); + void deleteDeadCastInst() const; + bool optimizeConstants(Function &F); }; } @@ -126,185 +193,37 @@ FunctionPass *llvm::createConstantHoistingPass() { } /// \brief Perform the constant hoisting optimization for the given function. -bool ConstantHoisting::runOnFunction(Function &F) { - DEBUG(dbgs() << "********** Constant Hoisting **********\n"); - DEBUG(dbgs() << "********** Function: " << F.getName() << '\n'); +bool ConstantHoisting::runOnFunction(Function &Fn) { + DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n"); + DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - TTI = &getAnalysis<TargetTransformInfo>(); + setup(Fn); - return OptimizeConstants(F); -} + bool MadeChange = optimizeConstants(Fn); -void ConstantHoisting::CollectConstant(User * U, unsigned Opcode, - Intrinsic::ID IID, ConstantInt *C) { - unsigned Cost; - if (Opcode) - Cost = TTI->getIntImmCost(Opcode, C->getValue(), C->getType()); - else - Cost = TTI->getIntImmCost(IID, C->getValue(), C->getType()); - - if (Cost > TargetTransformInfo::TCC_Basic) { - ConstantCandidate &CC = ConstantMap[C]; - CC.CumulativeCost += Cost; - CC.Uses.push_back(U); - DEBUG(dbgs() << "Collect constant " << *C << " with cost " << Cost - << " from " << *U << '\n'); + if (MadeChange) { + DEBUG(dbgs() << "********** Function after Constant Hoisting: " + << Fn.getName() << '\n'); + DEBUG(dbgs() << Fn); } + DEBUG(dbgs() << "********** End Constant Hoisting **********\n"); + + cleanup(); + + return MadeChange; } -/// \brief Scan the instruction or constant expression for expensive integer -/// constants and record them in the constant map. -void ConstantHoisting::CollectConstants(Instruction *I) { - unsigned Opcode = 0; - Intrinsic::ID IID = Intrinsic::not_intrinsic; - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) - IID = II->getIntrinsicID(); - else - Opcode = I->getOpcode(); - - // Scan all operands. - for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O) { - if (ConstantInt *C = dyn_cast<ConstantInt>(O)) { - CollectConstant(I, Opcode, IID, C); - continue; - } - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O)) { - // We only handle constant cast expressions. - if (!CE->isCast()) - continue; - - if (ConstantInt *C = dyn_cast<ConstantInt>(CE->getOperand(0))) { - // Ignore the cast expression and use the opcode of the instruction. - CollectConstant(CE, Opcode, IID, C); - continue; - } - } - } -} - -/// \brief Collect all integer constants in the function that cannot be folded -/// into an instruction itself. -void ConstantHoisting::CollectConstants(Function &F) { - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - CollectConstants(I); -} - -/// \brief Find the base constant within the given range and rebase all other -/// constants with respect to the base constant. -void ConstantHoisting::FindAndMakeBaseConstant(ConstantMapType::iterator S, - ConstantMapType::iterator E) { - ConstantMapType::iterator MaxCostItr = S; - unsigned NumUses = 0; - // Use the constant that has the maximum cost as base constant. - for (ConstantMapType::iterator I = S; I != E; ++I) { - NumUses += I->second.Uses.size(); - if (I->second.CumulativeCost > MaxCostItr->second.CumulativeCost) - MaxCostItr = I; - } - - // Don't hoist constants that have only one use. - if (NumUses <= 1) - return; - - ConstantInfo CI; - CI.BaseConstant = MaxCostItr->first; - Type *Ty = CI.BaseConstant->getType(); - // Rebase the constants with respect to the base constant. - for (ConstantMapType::iterator I = S; I != E; ++I) { - APInt Diff = I->first->getValue() - CI.BaseConstant->getValue(); - ConstantInfo::RebasedConstantInfo RCI; - RCI.OriginalConstant = I->first; - RCI.Offset = ConstantInt::get(Ty, Diff); - RCI.Uses = std::move(I->second.Uses); - CI.RebasedConstants.push_back(RCI); - } - Constants.push_back(CI); -} - -/// \brief Finds and combines constants that can be easily rematerialized with -/// an add from a common base constant. -void ConstantHoisting::FindBaseConstants() { - // Sort the constants by value and type. This invalidates the mapping. - std::sort(ConstantMap.begin(), ConstantMap.end(), - [](const std::pair<ConstantInt *, ConstantCandidate> &LHS, - const std::pair<ConstantInt *, ConstantCandidate> &RHS) { - if (LHS.first->getType() != RHS.first->getType()) - return LHS.first->getType()->getBitWidth() < - RHS.first->getType()->getBitWidth(); - return LHS.first->getValue().ult(RHS.first->getValue()); - }); - - // Simple linear scan through the sorted constant map for viable merge - // candidates. - ConstantMapType::iterator MinValItr = ConstantMap.begin(); - for (ConstantMapType::iterator I = std::next(ConstantMap.begin()), - E = ConstantMap.end(); I != E; ++I) { - if (MinValItr->first->getType() == I->first->getType()) { - // Check if the constant is in range of an add with immediate. - APInt Diff = I->first->getValue() - MinValItr->first->getValue(); - if ((Diff.getBitWidth() <= 64) && - TTI->isLegalAddImmediate(Diff.getSExtValue())) - continue; - } - // We either have now a different constant type or the constant is not in - // range of an add with immediate anymore. - FindAndMakeBaseConstant(MinValItr, I); - // Start a new base constant search. - MinValItr = I; - } - // Finalize the last base constant search. - FindAndMakeBaseConstant(MinValItr, ConstantMap.end()); -} - -/// \brief Records the basic block of the instruction or all basic blocks of the -/// users of the constant expression. -static void CollectBasicBlocks(SmallPtrSet<BasicBlock *, 4> &BBs, Function &F, - User *U) { - if (Instruction *I = dyn_cast<Instruction>(U)) - BBs.insert(I->getParent()); - else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) - // Find all users of this constant expression. - for (User *UU : CE->users()) - // Only record users that are instructions. We don't want to go down a - // nested constant expression chain. Also check if the instruction is even - // in the current function. - if (Instruction *I = dyn_cast<Instruction>(UU)) - if(I->getParent()->getParent() == &F) - BBs.insert(I->getParent()); -} - -/// \brief Find the instruction we should insert the constant materialization -/// before. -static Instruction *getMatInsertPt(Instruction *I, const DominatorTree *DT) { - if (!isa<PHINode>(I) && !isa<LandingPadInst>(I)) // Simple case. - return I; - - // We can't insert directly before a phi node or landing pad. Insert before - // the terminator of the dominating block. - assert(&I->getParent()->getParent()->getEntryBlock() != I->getParent() && - "PHI or landing pad in entry block!"); - BasicBlock *IDom = DT->getNode(I->getParent())->getIDom()->getBlock(); - return IDom->getTerminator(); -} - -/// \brief Find an insertion point that dominates all uses. -Instruction *ConstantHoisting:: -FindConstantInsertionPoint(Function &F, const ConstantInfo &CI) const { - BasicBlock *Entry = &F.getEntryBlock(); - +/// \brief Find nearest common dominator of all uses. +/// FIXME: Replace this with NearestCommonDominator once it is in common code. +BasicBlock * +ConstantHoisting::findIDomOfAllUses(const ConstantUseListType &Uses) const { // Collect all basic blocks. - SmallPtrSet<BasicBlock *, 4> BBs; - ConstantInfo::RebasedConstantListType::const_iterator RCI, RCE; - for (RCI = CI.RebasedConstants.begin(), RCE = CI.RebasedConstants.end(); - RCI != RCE; ++RCI) - for (SmallVectorImpl<User *>::const_iterator U = RCI->Uses.begin(), - E = RCI->Uses.end(); U != E; ++U) - CollectBasicBlocks(BBs, F, *U); + SmallPtrSet<BasicBlock *, 8> BBs; + for (auto const &U : Uses) + BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); if (BBs.count(Entry)) - return getMatInsertPt(&Entry->front(), DT); + return Entry; while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; @@ -312,111 +231,314 @@ FindConstantInsertionPoint(Function &F, const ConstantInfo &CI) const { BB2 = *std::next(BBs.begin()); BB = DT->findNearestCommonDominator(BB1, BB2); if (BB == Entry) - return getMatInsertPt(&Entry->front(), DT); + return Entry; + BBs.erase(BB1); + BBs.erase(BB2); + BBs.insert(BB); + } + assert((BBs.size() == 1) && "Expected only one element."); + return *BBs.begin(); +} + +/// \brief Find the constant materialization insertion point. +Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst, + unsigned Idx) const { + // The simple and common case. + if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst)) + return Inst; + + // We can't insert directly before a phi node or landing pad. Insert before + // the terminator of the incoming or dominating block. + assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!"); + if (Idx != ~0U && isa<PHINode>(Inst)) + return cast<PHINode>(Inst)->getIncomingBlock(Idx)->getTerminator(); + + BasicBlock *IDom = DT->getNode(Inst->getParent())->getIDom()->getBlock(); + return IDom->getTerminator(); +} + +/// \brief Find an insertion point that dominates all uses. +Instruction *ConstantHoisting:: +findConstantInsertionPoint(const ConstantInfo &ConstInfo) const { + assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); + // Collect all IDoms. + SmallPtrSet<BasicBlock *, 8> BBs; + for (auto const &RCI : ConstInfo.RebasedConstants) + BBs.insert(getIDom(RCI)); + + assert(!BBs.empty() && "No dominators!?"); + + if (BBs.count(Entry)) + return &Entry->front(); + + while (BBs.size() >= 2) { + BasicBlock *BB, *BB1, *BB2; + BB1 = *BBs.begin(); + BB2 = *std::next(BBs.begin()); + BB = DT->findNearestCommonDominator(BB1, BB2); + if (BB == Entry) + return &Entry->front(); BBs.erase(BB1); BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); Instruction &FirstInst = (*BBs.begin())->front(); - return getMatInsertPt(&FirstInst, DT); + return findMatInsertPt(&FirstInst); +} + + +/// \brief Record constant integer ConstInt for instruction Inst at operand +/// index Idx. +/// +/// The operand at index Idx is not necessarily the constant inetger itself. It +/// could also be a cast instruction or a constant expression that uses the +// constant integer. +void ConstantHoisting::collectConstantCandidates(Instruction *Inst, + unsigned Idx, + ConstantInt *ConstInt) { + unsigned Cost; + // Ask the target about the cost of materializing the constant for the given + // instruction and operand index. + if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst)) + Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx, + ConstInt->getValue(), ConstInt->getType()); + else + Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(), + ConstInt->getType()); + + // Ignore cheap integer constants. + if (Cost > TargetTransformInfo::TCC_Basic) { + ConstCandMapType::iterator Itr; + bool Inserted; + std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0)); + if (Inserted) { + ConstCandVec.push_back(ConstantCandidate(ConstInt)); + Itr->second = ConstCandVec.size() - 1; + } + ConstCandVec[Itr->second].addUser(Inst, Idx, Cost); + DEBUG(if (auto ConstInt = dyn_cast<ConstantInt>(Inst->getOperand(Idx))) + dbgs() << "Collect constant " << *ConstInt << " from " << *Inst + << " with cost " << Cost << '\n'; + else + dbgs() << "Collect constant " << *ConstInt << " indirectly from " + << *Inst << " via " << *Inst->getOperand(Idx) << " with cost " + << Cost << '\n'; + ); + } +} + +/// \brief Scan the instruction for expensive integer constants and record them +/// in the constant candidate vector. +void ConstantHoisting::collectConstantCandidates(Instruction *Inst) { + // Skip all cast instructions. They are visited indirectly later on. + if (Inst->isCast()) + return; + + // Can't handle inline asm. Skip it. + if (auto Call = dyn_cast<CallInst>(Inst)) + if (isa<InlineAsm>(Call->getCalledValue())) + return; + + // Scan all operands. + for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) { + Value *Opnd = Inst->getOperand(Idx); + + // Vist constant integers. + if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) { + collectConstantCandidates(Inst, Idx, ConstInt); + continue; + } + + // Visit cast instructions that have constant integers. + if (auto CastInst = dyn_cast<Instruction>(Opnd)) { + // Only visit cast instructions, which have been skipped. All other + // instructions should have already been visited. + if (!CastInst->isCast()) + continue; + + if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) { + // Pretend the constant is directly used by the instruction and ignore + // the cast instruction. + collectConstantCandidates(Inst, Idx, ConstInt); + continue; + } + } + + // Visit constant expressions that have constant integers. + if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) { + // Only visit constant cast expressions. + if (!ConstExpr->isCast()) + continue; + + if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) { + // Pretend the constant is directly used by the instruction and ignore + // the constant expression. + collectConstantCandidates(Inst, Idx, ConstInt); + continue; + } + } + } // end of for all operands +} + +/// \brief Collect all integer constants in the function that cannot be folded +/// into an instruction itself. +void ConstantHoisting::collectConstantCandidates(Function &Fn) { + for (Function::iterator BB : Fn) + for (BasicBlock::iterator I : *BB) + collectConstantCandidates(I); +} + +/// \brief Find the base constant within the given range and rebase all other +/// constants with respect to the base constant. +void ConstantHoisting::findAndMakeBaseConstant(ConstCandVecType::iterator S, + ConstCandVecType::iterator E) { + auto MaxCostItr = S; + unsigned NumUses = 0; + // Use the constant that has the maximum cost as base constant. + for (auto ConstCand = S; ConstCand != E; ++ConstCand) { + NumUses += ConstCand->Uses.size(); + if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) + MaxCostItr = ConstCand; + } + + // Don't hoist constants that have only one use. + if (NumUses <= 1) + return; + + ConstantInfo ConstInfo; + ConstInfo.BaseConstant = MaxCostItr->ConstInt; + Type *Ty = ConstInfo.BaseConstant->getType(); + + // Rebase the constants with respect to the base constant. + for (auto ConstCand = S; ConstCand != E; ++ConstCand) { + APInt Diff = ConstCand->ConstInt->getValue() - + ConstInfo.BaseConstant->getValue(); + Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff); + ConstInfo.RebasedConstants.push_back( + RebasedConstantInfo(std::move(ConstCand->Uses), Offset)); + } + ConstantVec.push_back(ConstInfo); +} + +/// \brief Finds and combines constant candidates that can be easily +/// rematerialized with an add from a common base constant. +void ConstantHoisting::findBaseConstants() { + // Sort the constants by value and type. This invalidates the mapping! + std::sort(ConstCandVec.begin(), ConstCandVec.end(), + [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) { + if (LHS.ConstInt->getType() != RHS.ConstInt->getType()) + return LHS.ConstInt->getType()->getBitWidth() < + RHS.ConstInt->getType()->getBitWidth(); + return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue()); + }); + + // Simple linear scan through the sorted constant candidate vector for viable + // merge candidates. + auto MinValItr = ConstCandVec.begin(); + for (auto CC = std::next(ConstCandVec.begin()), E = ConstCandVec.end(); + CC != E; ++CC) { + if (MinValItr->ConstInt->getType() == CC->ConstInt->getType()) { + // Check if the constant is in range of an add with immediate. + APInt Diff = CC->ConstInt->getValue() - MinValItr->ConstInt->getValue(); + if ((Diff.getBitWidth() <= 64) && + TTI->isLegalAddImmediate(Diff.getSExtValue())) + continue; + } + // We either have now a different constant type or the constant is not in + // range of an add with immediate anymore. + findAndMakeBaseConstant(MinValItr, CC); + // Start a new base constant search. + MinValItr = CC; + } + // Finalize the last base constant search. + findAndMakeBaseConstant(MinValItr, ConstCandVec.end()); } /// \brief Emit materialization code for all rebased constants and update their /// users. -void ConstantHoisting::EmitBaseConstants(Function &F, User *U, - Instruction *Base, Constant *Offset, - ConstantInt *OriginalConstant) { - if (Instruction *I = dyn_cast<Instruction>(U)) { - Instruction *Mat = Base; - if (!Offset->isNullValue()) { - Mat = BinaryOperator::Create(Instruction::Add, Base, Offset, - "const_mat", getMatInsertPt(I, DT)); +void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset, + const ConstantUser &CU) { + Instruction *Mat = Base; + if (Offset) { + Instruction *InsertionPt = findMatInsertPt(CU.Inst, CU.OpndIdx); + Mat = BinaryOperator::Create(Instruction::Add, Base, Offset, + "const_mat", InsertionPt); - // Use the same debug location as the instruction we are about to update. - Mat->setDebugLoc(I->getDebugLoc()); + DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0) + << " + " << *Offset << ") in BB " + << Mat->getParent()->getName() << '\n' << *Mat << '\n'); + Mat->setDebugLoc(CU.Inst->getDebugLoc()); + } + Value *Opnd = CU.Inst->getOperand(CU.OpndIdx); - DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0) - << " + " << *Offset << ") in BB " - << I->getParent()->getName() << '\n' << *Mat << '\n'); - } - DEBUG(dbgs() << "Update: " << *I << '\n'); - I->replaceUsesOfWith(OriginalConstant, Mat); - DEBUG(dbgs() << "To: " << *I << '\n'); + // Visit constant integer. + if (isa<ConstantInt>(Opnd)) { + DEBUG(dbgs() << "Update: " << *CU.Inst << '\n'); + CU.Inst->setOperand(CU.OpndIdx, Mat); + DEBUG(dbgs() << "To : " << *CU.Inst << '\n'); return; } - assert(isa<ConstantExpr>(U) && "Expected a ConstantExpr."); - ConstantExpr *CE = cast<ConstantExpr>(U); - SmallVector<std::pair<Instruction *, Instruction *>, 8> WorkList; - DEBUG(dbgs() << "Visit ConstantExpr " << *CE << '\n'); - for (User *UU : CE->users()) { - DEBUG(dbgs() << "Check user "; UU->print(dbgs()); dbgs() << '\n'); - // We only handel instructions here and won't walk down a ConstantExpr chain - // to replace all ConstExpr with instructions. - if (Instruction *I = dyn_cast<Instruction>(UU)) { - // Only update constant expressions in the current function. - if (I->getParent()->getParent() != &F) { - DEBUG(dbgs() << "Not in the same function - skip.\n"); - continue; - } - Instruction *Mat = Base; - Instruction *InsertBefore = getMatInsertPt(I, DT); - if (!Offset->isNullValue()) { - Mat = BinaryOperator::Create(Instruction::Add, Base, Offset, - "const_mat", InsertBefore); - - // Use the same debug location as the instruction we are about to - // update. - Mat->setDebugLoc(I->getDebugLoc()); - - DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0) - << " + " << *Offset << ") in BB " - << I->getParent()->getName() << '\n' << *Mat << '\n'); - } - Instruction *ICE = CE->getAsInstruction(); - ICE->replaceUsesOfWith(OriginalConstant, Mat); - ICE->insertBefore(InsertBefore); - - // Use the same debug location as the instruction we are about to update. - ICE->setDebugLoc(I->getDebugLoc()); - - WorkList.push_back(std::make_pair(I, ICE)); - } else { - DEBUG(dbgs() << "Not an instruction - skip.\n"); + // Visit cast instruction. + if (auto CastInst = dyn_cast<Instruction>(Opnd)) { + assert(CastInst->isCast() && "Expected an cast instruction!"); + // Check if we already have visited this cast instruction before to avoid + // unnecessary cloning. + Instruction *&ClonedCastInst = ClonedCastMap[CastInst]; + if (!ClonedCastInst) { + ClonedCastInst = CastInst->clone(); + ClonedCastInst->setOperand(0, Mat); + ClonedCastInst->insertAfter(CastInst); + // Use the same debug location as the original cast instruction. + ClonedCastInst->setDebugLoc(CastInst->getDebugLoc()); + DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n' + << "To : " << *CastInst << '\n'); } + + DEBUG(dbgs() << "Update: " << *CU.Inst << '\n'); + CU.Inst->setOperand(CU.OpndIdx, ClonedCastInst); + DEBUG(dbgs() << "To : " << *CU.Inst << '\n'); + return; } - SmallVectorImpl<std::pair<Instruction *, Instruction *> >::iterator I, E; - for (I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { - DEBUG(dbgs() << "Create instruction: " << *I->second << '\n'); - DEBUG(dbgs() << "Update: " << *I->first << '\n'); - I->first->replaceUsesOfWith(CE, I->second); - DEBUG(dbgs() << "To: " << *I->first << '\n'); + + // Visit constant expression. + if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) { + Instruction *ConstExprInst = ConstExpr->getAsInstruction(); + ConstExprInst->setOperand(0, Mat); + ConstExprInst->insertBefore(findMatInsertPt(CU.Inst, CU.OpndIdx)); + + // Use the same debug location as the instruction we are about to update. + ConstExprInst->setDebugLoc(CU.Inst->getDebugLoc()); + + DEBUG(dbgs() << "Create instruction: " << *ConstExprInst << '\n' + << "From : " << *ConstExpr << '\n'); + DEBUG(dbgs() << "Update: " << *CU.Inst << '\n'); + CU.Inst->setOperand(CU.OpndIdx, ConstExprInst); + DEBUG(dbgs() << "To : " << *CU.Inst << '\n'); + return; } } /// \brief Hoist and hide the base constant behind a bitcast and emit /// materialization code for derived constants. -bool ConstantHoisting::EmitBaseConstants(Function &F) { +bool ConstantHoisting::emitBaseConstants() { bool MadeChange = false; - SmallVectorImpl<ConstantInfo>::iterator CI, CE; - for (CI = Constants.begin(), CE = Constants.end(); CI != CE; ++CI) { + for (auto const &ConstInfo : ConstantVec) { // Hoist and hide the base constant behind a bitcast. - Instruction *IP = FindConstantInsertionPoint(F, *CI); - IntegerType *Ty = CI->BaseConstant->getType(); - Instruction *Base = new BitCastInst(CI->BaseConstant, Ty, "const", IP); - DEBUG(dbgs() << "Hoist constant (" << *CI->BaseConstant << ") to BB " - << IP->getParent()->getName() << '\n'); + Instruction *IP = findConstantInsertionPoint(ConstInfo); + IntegerType *Ty = ConstInfo.BaseConstant->getType(); + Instruction *Base = + new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); + DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB " + << IP->getParent()->getName() << '\n' << *Base << '\n'); NumConstantsHoisted++; // Emit materialization code for all rebased constants. - ConstantInfo::RebasedConstantListType::iterator RCI, RCE; - for (RCI = CI->RebasedConstants.begin(), RCE = CI->RebasedConstants.end(); - RCI != RCE; ++RCI) { + for (auto const &RCI : ConstInfo.RebasedConstants) { NumConstantsRebased++; - for (SmallVectorImpl<User *>::iterator U = RCI->Uses.begin(), - E = RCI->Uses.end(); U != E; ++U) - EmitBaseConstants(F, *U, Base, RCI->Offset, RCI->OriginalConstant); + for (auto const &U : RCI.Uses) + emitBaseConstants(Base, RCI.Offset, U); } // Use the same debug location as the last user of the constant. @@ -432,27 +554,37 @@ bool ConstantHoisting::EmitBaseConstants(Function &F) { return MadeChange; } +/// \brief Check all cast instructions we made a copy of and remove them if they +/// have no more users. +void ConstantHoisting::deleteDeadCastInst() const { + for (auto const &I : ClonedCastMap) + if (I.first->use_empty()) + I.first->removeFromParent(); +} + /// \brief Optimize expensive integer constants in the given function. -bool ConstantHoisting::OptimizeConstants(Function &F) { - bool MadeChange = false; - +bool ConstantHoisting::optimizeConstants(Function &Fn) { // Collect all constant candidates. - CollectConstants(F); + collectConstantCandidates(Fn); - // There are no constants to worry about. - if (ConstantMap.empty()) - return MadeChange; + // There are no constant candidates to worry about. + if (ConstCandVec.empty()) + return false; // Combine constants that can be easily materialized with an add from a common // base constant. - FindBaseConstants(); + findBaseConstants(); + + // There are no constants to emit. + if (ConstantVec.empty()) + return false; // Finally hoist the base constant and emit materializating code for dependent // constants. - MadeChange |= EmitBaseConstants(F); + bool MadeChange = emitBaseConstants(); - ConstantMap.clear(); - Constants.clear(); + // Cleanup dead instructions. + deleteDeadCastInst(); return MadeChange; } diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll index d4a7ac7da12..8ea3c53de41 100644 --- a/test/CodeGen/X86/lsr-interesting-step.ll +++ b/test/CodeGen/X86/lsr-interesting-step.ll @@ -3,26 +3,24 @@ ; The inner loop should require only one add (and no leas either). ; rdar://8100380 -; CHECK: BB0_3: -; CHECK-NEXT: movb $0, flags(%rdx) -; CHECK-NEXT: addq %rax, %rdx -; CHECK-NEXT: cmpq $8192, %rdx +; CHECK: BB0_2: +; CHECK-NEXT: movb $0, flags(%rcx) +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: cmpq $8192, %rcx ; CHECK-NEXT: jl @flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1] define void @foo() nounwind { entry: - %tmp = icmp slt i64 2, 8192 ; <i1> [#uses=1] - br i1 %tmp, label %bb, label %bb21 + br label %bb bb: ; preds = %entry br label %bb7 bb7: ; preds = %bb, %bb17 %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ] ; <i64> [#uses=2] - %tmp9 = icmp slt i64 2, 8192 ; <i1> [#uses=1] - br i1 %tmp9, label %bb10, label %bb17 + br label %bb10 bb10: ; preds = %bb7 br label %bb11 diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll index 92850f22eaa..c961bd091b9 100644 --- a/test/CodeGen/X86/negate-add-zero.ll +++ b/test/CodeGen/X86/negate-add-zero.ll @@ -827,9 +827,7 @@ declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13 declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*) define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) { -entry: - %0 = add i32 0, -1 ; <i32> [#uses=1] - %1 = getelementptr double* null, i32 %0 ; <double*> [#uses=1] + %1 = getelementptr double* null, i32 -1 ; <double*> [#uses=1] %2 = load double* %1, align 8 ; <double> [#uses=1] %3 = load double* null, align 8 ; <double> [#uses=2] %4 = load double* null, align 8 ; <double> [#uses=2] @@ -890,13 +888,12 @@ entry: store double %52, double* %55, align 8 %56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2 ; <double*> [#uses=1] store double %53, double* %56, align 8 - %57 = add i32 0, 4 ; <i32> [#uses=1] - %58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0 ; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1] - store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %58, align 8 - %59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1 ; <i32*> [#uses=1] - store i32 %57, i32* %59, align 4 - %60 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2 ; <i32*> [#uses=1] - store i32 3, i32* %60, align 8 + %57 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0 ; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1] + store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %57, align 8 + %58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1 ; <i32*> [#uses=1] + store i32 4, i32* %58, align 4 + %59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2 ; <i32*> [#uses=1] + store i32 3, i32* %59, align 8 unreachable } diff --git a/test/Transforms/ConstantHoisting/X86/phi.ll b/test/Transforms/ConstantHoisting/X86/phi.ll index cc2fdda40e7..7134723f61a 100644 --- a/test/Transforms/ConstantHoisting/X86/phi.ll +++ b/test/Transforms/ConstantHoisting/X86/phi.ll @@ -19,11 +19,11 @@ return: ret i8* %retval.0 ; CHECK-LABEL: @test1 -; CHECK: entry: -; CHECK: %const_mat = add i64 %const, 1 -; CHECK-NEXT: %1 = inttoptr i64 %const_mat to i8* -; CHECK-NEXT: br i1 %cmp -; CHECK: %retval.0 = phi i8* [ null, %entry ], [ %1, %if.end ] +; CHECK: if.end: +; CHECK: %2 = inttoptr i64 %const to i8* +; CHECK-NEXT: br +; CHECK: return: +; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ] } define void @test2(i1 %cmp, i64** %tmp) {