diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index bd25f42d9b0..bf6af970b6f 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -14,12 +14,14 @@ #define DEBUG_TYPE "delay-slot-filler" #include "Mips.h" +#include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -66,6 +68,24 @@ static cl::opt DisableBackwardSearch( cl::Hidden); namespace { + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; + typedef SmallDenseMap BB2BrMap; + + /// \brief A functor comparing edge weight of two blocks. + struct CmpWeight { + CmpWeight(const MachineBasicBlock &S, + const MachineBranchProbabilityInfo &P) : Src(S), Prob(P) {} + + bool operator()(const MachineBasicBlock *Dst0, + const MachineBasicBlock *Dst1) const { + return Prob.getEdgeWeight(&Src, Dst0) < Prob.getEdgeWeight(&Src, Dst1); + } + + const MachineBasicBlock &Src; + const MachineBranchProbabilityInfo &Prob; + }; + class RegDefsUses { public: RegDefsUses(TargetMachine &TM); @@ -74,6 +94,14 @@ namespace { /// This function sets all caller-saved registers in Defs. void setCallerSaved(const MachineInstr &MI); + /// This function sets all unallocatable registers in Defs. + void setUnallocatableRegs(const MachineFunction &MF); + + /// Set bits in Uses corresponding to MBB's live-out registers except for + /// the registers that are live-in to SuccBB. + void addLiveOut(const MachineBasicBlock &MBB, + const MachineBasicBlock &SuccBB); + bool update(const MachineInstr &MI, unsigned Begin, unsigned End); private: @@ -90,14 +118,41 @@ namespace { /// Base class for inspecting loads and stores. class InspectMemInstr { public: - virtual bool hasHazard(const MachineInstr &MI) = 0; + InspectMemInstr(bool ForbidMemInstr_) + : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false), + SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {} + + /// Return true if MI cannot be moved to delay slot. + bool hasHazard(const MachineInstr &MI); + virtual ~InspectMemInstr() {} + + protected: + /// Flags indicating whether loads or stores have been seen. + bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore; + + /// Memory instructions are not allowed to move to delay slot if this flag + /// is true. + bool ForbidMemInstr; + + private: + virtual bool hasHazard_(const MachineInstr &MI) = 0; }; /// This subclass rejects any memory instructions. class NoMemInstr : public InspectMemInstr { public: - virtual bool hasHazard(const MachineInstr &MI); + NoMemInstr() : InspectMemInstr(true) {} + private: + virtual bool hasHazard_(const MachineInstr &MI) { return true; } + }; + + /// This subclass accepts loads from stacks and constant loads. + class LoadFromStackOrConst : public InspectMemInstr { + public: + LoadFromStackOrConst() : InspectMemInstr(false) {} + private: + virtual bool hasHazard_(const MachineInstr &MI); }; /// This subclass uses memory dependence information to determine whether a @@ -106,10 +161,9 @@ namespace { public: MemDefsUses(const MachineFrameInfo *MFI); - /// Return true if MI cannot be moved to delay slot. - virtual bool hasHazard(const MachineInstr &MI); - private: + virtual bool hasHazard_(const MachineInstr &MI); + /// Update Defs and Uses. Return true if there exist dependences that /// disqualify the delay slot candidate between V and values in Uses and Defs. bool updateDefsUses(const Value *V, bool MayStore); @@ -121,16 +175,9 @@ namespace { const MachineFrameInfo *MFI; SmallPtrSet Uses, Defs; - /// Flags indicating whether loads or stores have been seen. - bool SeenLoad, SeenStore; - /// Flags indicating whether loads or stores with no underlying objects have /// been seen. bool SeenNoObjLoad, SeenNoObjStore; - - /// Memory instructions are not allowed to move to delay slot if this flag - /// is true. - bool ForbidMemInstr; }; class Filler : public MachineFunctionPass { @@ -153,10 +200,12 @@ namespace { return Changed; } - private: - typedef MachineBasicBlock::iterator Iter; - typedef MachineBasicBlock::reverse_iterator ReverseIter; + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: bool runOnMachineBasicBlock(MachineBasicBlock &MBB); /// This function checks if it is valid to move Candidate to the delay slot @@ -179,6 +228,26 @@ namespace { /// that can be moved to the delay slot. Returns true on success. bool searchForward(MachineBasicBlock &MBB, Iter Slot) const; + /// This function searches MBB's successor blocks for an instruction that + /// can be moved to the delay slot and inserts clones of the instruction into + /// the successor blocks. + bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const; + + /// Pick a successor block of MBB. Return NULL if MBB doesn't have a successor + /// block that is not a landing pad. + MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const; + + /// This function analyzes MBB and returns an instruction with an unoccupied + /// slot that branches to Dst. + std::pair + getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const; + + /// Examine Pred and see if it is possible to insert an instruction into + /// one of its branches delay slot or its end. + bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, + RegDefsUses &RegDU, bool &HasMultipleSuccs, + BB2BrMap &BrMap) const; + bool terminateSearch(const MachineInstr &Candidate) const; TargetMachine &TM; @@ -189,6 +258,45 @@ namespace { char Filler::ID = 0; } // end of anonymous namespace +static bool hasUnoccupiedSlot(const MachineInstr *MI) { + return MI->hasDelaySlot() && !MI->isBundledWithSucc(); +} + +/// This function inserts clones of Filler into predecessor blocks. +static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) { + MachineFunction *MF = Filler->getParent()->getParent(); + + for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) { + if (I->second) { + MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler)); + ++UsefulSlots; + } else { + I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler)); + } + } +} + +/// This function adds registers Filler defines to MBB's live-in register list. +static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) { + for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = Filler->getOperand(I); + unsigned R; + + if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg())) + continue; + +#ifndef NDEBUG + const MachineFunction &MF = *MBB.getParent(); + assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) && + "Shouldn't move an instruction with unallocatable registers across " + "basic block boundaries."); +#endif + + if (!MBB.isLiveIn(R)) + MBB.addLiveIn(R); + } +} + RegDefsUses::RegDefsUses(TargetMachine &TM) : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {} @@ -226,6 +334,29 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) { Defs |= CallerSavedRegs; } +void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { + BitVector AllocSet = TRI.getAllocatableSet(MF); + + for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R)) + for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) + AllocSet.set(*AI); + + AllocSet.set(Mips::ZERO); + AllocSet.set(Mips::ZERO_64); + + Defs |= AllocSet.flip(); +} + +void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB, + const MachineBasicBlock &SuccBB) { + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + if (*SI != &SuccBB) + for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(), + LE = (*SI)->livein_end(); LI != LE; ++LI) + Uses.set(*LI); +} + bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); bool HasHazard = false; @@ -264,24 +395,15 @@ bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { return false; } -bool NoMemInstr::hasHazard(const MachineInstr &MI) { - // Return true if MI accesses memory. - return (MI.mayStore() || MI.mayLoad()); -} - -MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) - : MFI(MFI_), SeenLoad(false), SeenStore(false), SeenNoObjLoad(false), - SeenNoObjStore(false), ForbidMemInstr(false) {} - -bool MemDefsUses::hasHazard(const MachineInstr &MI) { +bool InspectMemInstr::hasHazard(const MachineInstr &MI) { if (!MI.mayStore() && !MI.mayLoad()) return false; if (ForbidMemInstr) return true; - bool OrigSeenLoad = SeenLoad, OrigSeenStore = SeenStore; - + OrigSeenLoad = SeenLoad; + OrigSeenStore = SeenStore; SeenLoad |= MI.mayLoad(); SeenStore |= MI.mayStore(); @@ -292,6 +414,33 @@ bool MemDefsUses::hasHazard(const MachineInstr &MI) { return true; } + return hasHazard_(MI); +} + +bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { + if (MI.mayStore()) + return true; + + if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue()) + return true; + + const Value *V = (*MI.memoperands_begin())->getValue(); + + if (isa(V)) + return false; + + if (const PseudoSourceValue *PSV = dyn_cast(V)) + return !PSV->PseudoSourceValue::isConstant(0) && + (V != PseudoSourceValue::getStack()); + + return true; +} + +MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) + : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false), + SeenNoObjStore(false) {} + +bool MemDefsUses::hasHazard_(const MachineInstr &MI) { bool HasHazard = false; SmallVector Objs; @@ -353,16 +502,24 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (Iter I = MBB.begin(); I != MBB.end(); ++I) { - if (!I->hasDelaySlot()) + if (!hasUnoccupiedSlot(&*I)) continue; ++FilledSlots; Changed = true; // Delay slot filling is disabled at -O0. - if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && - (searchBackward(MBB, I) || searchForward(MBB, I))) - continue; + if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) { + if (searchBackward(MBB, I)) + continue; + + if (I->isTerminator()) { + if (searchSuccBBs(MBB, I)) + continue; + } else if (searchForward(MBB, I)) { + continue; + } + } // Bundle the NOP to the instruction with the delay slot. BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); @@ -404,6 +561,9 @@ bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, } bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const { + if (DisableBackwardSearch) + return false; + RegDefsUses RegDU(TM); MemDefsUses MemDU(MBB.getParent()->getFrameInfo()); ReverseIter Filler; @@ -422,7 +582,7 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const { bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const { // Can handle only calls. - if (!Slot->isCall()) + if (DisableForwardSearch || !Slot->isCall()) return false; RegDefsUses RegDU(TM); @@ -441,6 +601,117 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const { return false; } +bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { + if (DisableSuccBBSearch) + return false; + + MachineBasicBlock *SuccBB = selectSuccBB(MBB); + + if (!SuccBB) + return false; + + RegDefsUses RegDU(TM); + bool HasMultipleSuccs = false; + BB2BrMap BrMap; + OwningPtr IM; + Iter Filler; + + // Iterate over SuccBB's predecessor list. + for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(), + PE = SuccBB->pred_end(); PI != PE; ++PI) + if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap)) + return false; + + // Do not allow moving instructions which have unallocatable register operands + // across basic block boundaries. + RegDU.setUnallocatableRegs(*MBB.getParent()); + + // Only allow moving loads from stack or constants if any of the SuccBB's + // predecessors have multiple successors. + if (HasMultipleSuccs) { + IM.reset(new LoadFromStackOrConst()); + } else { + const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo(); + IM.reset(new MemDefsUses(MFI)); + } + + if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler)) + return false; + + insertDelayFiller(Filler, BrMap); + addLiveInRegs(Filler, *SuccBB); + Filler->eraseFromParent(); + + return true; +} + +MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const { + if (B.succ_empty()) + return NULL; + + // Select the successor with the larget edge weight. + CmpWeight Cmp(B, getAnalysis()); + MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), Cmp); + return S->isLandingPad() ? NULL : S; +} + +std::pair +Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const { + const MipsInstrInfo *TII = + static_cast(TM.getInstrInfo()); + MachineBasicBlock *TrueBB = 0, *FalseBB = 0; + SmallVector BranchInstrs; + SmallVector Cond; + + MipsInstrInfo::BranchType R = + TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs); + + if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch)) + return std::make_pair(R, (MachineInstr*)NULL); + + if (R != MipsInstrInfo::BT_CondUncond) { + if (!hasUnoccupiedSlot(BranchInstrs[0])) + return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL); + + assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst))); + + return std::make_pair(R, BranchInstrs[0]); + } + + assert((TrueBB == &Dst) || (FalseBB == &Dst)); + + // Examine the conditional branch. See if its slot is occupied. + if (hasUnoccupiedSlot(BranchInstrs[0])) + return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]); + + // If that fails, try the unconditional branch. + if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst)) + return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]); + + return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL); +} + +bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, + RegDefsUses &RegDU, bool &HasMultipleSuccs, + BB2BrMap &BrMap) const { + std::pair P = + getBranch(Pred, Succ); + + // Return if either getBranch wasn't able to analyze the branches or there + // were no branches with unoccupied slots. + if (P.first == MipsInstrInfo::BT_None) + return false; + + if ((P.first != MipsInstrInfo::BT_Uncond) && + (P.first != MipsInstrInfo::BT_NoBranch)) { + HasMultipleSuccs = true; + RegDU.addLiveOut(Pred, Succ); + } + + BrMap[&Pred] = P.second; + return true; +} + bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU, InspectMemInstr &IM) const { bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill()); diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll index 5e51abe8fe2..9685e7b95df 100644 --- a/test/CodeGen/Mips/brdelayslot.ll +++ b/test/CodeGen/Mips/brdelayslot.ll @@ -4,6 +4,9 @@ ; RUN: FileCheck %s -check-prefix=STATICO1 ; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \ ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD +; RUN: llc -march=mipsel -disable-mips-df-backward-search \ +; RUN: -disable-mips-df-succbb-search=false < %s | \ +; RUN: FileCheck %s -check-prefix=SUCCBB define void @foo1() nounwind { entry: @@ -75,6 +78,7 @@ if.end: ; ; Default: foo6: ; Default-NOT: nop +; Default: .end foo6 define void @foo6(float %a0, double %a1) nounwind { entry: @@ -109,6 +113,7 @@ entry: ; FORWARD: jal foo11 ; FORWARD: jal foo11 ; FORWARD-NOT: nop +; FORWARD: end foo10 define void @foo10() nounwind { entry: @@ -121,3 +126,54 @@ entry: } declare void @foo11() + +; Check that delay slots of branches in both the entry block and loop body are +; filled. +; +; SUCCBB: succbbs_loop1: +; SUCCBB: bne ${{[0-9]+}}, $zero, $BB +; SUCCBB-NEXT: addiu +; SUCCBB: bne ${{[0-9]+}}, $zero, $BB +; SUCCBB-NEXT: addiu + +define i32 @succbbs_loop1(i32* nocapture %a, i32 %n) { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %s.06 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i32 %i.05 + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %s.06 + %inc = add nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %s.0.lcssa +} + +; Check that the first branch has its slot filled. +; +; SUCCBB: succbbs_br1: +; SUCCBB: beq ${{[0-9]+}}, $zero, $BB +; SUCCBB-NEXT: lw $25, %call16(foo100) + +define void @succbbs_br1(i32 %a) { +entry: + %tobool = icmp eq i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @foo100() #1 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + +declare void @foo100() +