diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 21675dcbac8..90608f16ac1 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -321,6 +321,12 @@ public: assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!"); return 0; } + + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything + /// after it, replacing it with an unconditional branch to NewDest. This is + /// used by the tail merging pass. + virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const = 0; /// copyRegToReg - Emit instructions to copy between a pair of registers. It /// returns false if the target does not how to copy between the specified @@ -562,6 +568,13 @@ public: return true; } + /// isSchedulingBoundary - Test if the given instruction should be + /// considered a scheduling boundary. This primarily includes labels and + /// terminators. + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const = 0; + /// GetInstSize - Returns the size of the specified Instruction. /// virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const { @@ -595,6 +608,8 @@ protected: TargetInstrInfoImpl(const TargetInstrDesc *desc, unsigned NumOpcodes) : TargetInstrInfo(desc, NumOpcodes) {} public: + virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) const; virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI = false) const; virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, @@ -610,6 +625,9 @@ public: MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1) const; + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const; virtual ScheduleHazardRecognizer * diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index e440e40f05a..e17e47a662d 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -358,24 +358,10 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything -/// after it, replacing it with an unconditional branch to NewDest. This -/// returns true if OldInst's block is modified, false if NewDest is modified. +/// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { - MachineBasicBlock *OldBB = OldInst->getParent(); - - // Remove all the old successors of OldBB from the CFG. - while (!OldBB->succ_empty()) - OldBB->removeSuccessor(OldBB->succ_begin()); - - // Remove all the dead instructions from the end of OldBB. - OldBB->erase(OldInst, OldBB->end()); - - // If OldBB isn't immediately before OldBB, insert a branch to it. - if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) - TII->InsertBranch(*OldBB, NewDest, 0, SmallVector(), - OldInst->getDebugLoc()); - OldBB->addSuccessor(NewDest); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); ++NumTailMerge; } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 36e6fc727bf..16d324735c8 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -235,6 +235,12 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); if (!TII) return false; + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true); + bool BFChange = BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); @@ -376,6 +382,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { getAnalysisIfAvailable()); } + MadeChange |= BFChange; return MadeChange; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 8baf01c9073..2297c908b1e 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -390,7 +390,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().getDesc().isBarrier()) { + if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f3c725da9e6..4af8e07f348 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -79,6 +79,7 @@ AntiDepBreaker::~AntiDepBreaker() { } namespace { class PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; + const TargetInstrInfo *TII; CodeGenOpt::Level OptLevel; public: @@ -181,30 +182,9 @@ namespace { }; } -/// isSchedulingBoundary - Test if the given instruction should be -/// considered a scheduling boundary. This primarily includes labels -/// and terminators. -/// -static bool isSchedulingBoundary(const MachineInstr *MI, - const MachineFunction &MF) { - // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) - return true; - - // Don't attempt to schedule around any instruction that defines - // a stack-oriented pointer, as it's unlikely to be profitable. This - // saves compile time, because it doesn't require every single - // stack slot reference to depend on the instruction that does the - // modification. - const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) - return true; - - return false; -} - bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { AA = &getAnalysis(); + TII = Fn.getTarget().getInstrInfo(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -265,8 +245,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator Current = MBB->end(); unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { - MachineInstr *MI = prior(I); - if (isSchedulingBoundary(MI, Fn)) { + MachineInstr *MI = llvm::prior(I); + if (TII->isSchedulingBoundary(MI, MBB, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); Scheduler.EmitSchedule(); Current = MI; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 1641c7162ab..53f3ee84910 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallVector.h" @@ -27,6 +28,25 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +void +TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, 0, SmallVector(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + // commuteInstruction - The default implementation of this method just exchanges // the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, @@ -316,6 +336,28 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return true; } +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const{ + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) + return true; + + return false; +} + // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6eb3fab3a28..8d3ad8095ec 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1306,6 +1306,34 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } +bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Treat the start of the IT block as a scheduling boundary, but schedule + // t2IT along with all instructions following it. + // FIXME: This is a big hammer. But the alternative is to add all potential + // true and anti dependencies to IT block instructions as implicit operands + // to the t2IT instruction. The added compile time and complexity does not + // seem worth it. + MachineBasicBlock::const_iterator I = MI; + if (++I != MBB->end() && I->getOpcode() == ARM::t2IT) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + if (MI->definesRegister(ARM::SP)) + return true; + + return false; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index fec26e197c4..bc82e145b39 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -319,6 +319,10 @@ public: virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1) const; + + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; }; static inline diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0172bb47b9e..c2cb05e532a 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2528,6 +2528,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // IT block +let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, Size2Bytes, IIC_iALUx, "it$mask\t$cc", "", []> { diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 01342763559..7e57a1ca557 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; + /// HasITBlocks - True if IT blocks have been inserted. + bool HasITBlocks; + public: ARMFunctionInfo() : isThumb(false), @@ -97,7 +100,8 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget().isThumb()), @@ -108,7 +112,8 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -229,6 +234,9 @@ public: int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + bool hasITBlocks() const { return HasITBlocks; } + void setHasITBlocks(bool h) { HasITBlocks = h; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b9417e353dc..e41d9fc26a7 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -197,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; } // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; - -def FPSCR : ARMReg<1, "fpscr">; +def CPSR : ARMReg<0, "cpsr">; +def FPSCR : ARMReg<1, "fpscr">; +def ITSTATE : ARMReg<2, "itstate">; // Register classes. // @@ -557,4 +557,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; - diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 2101da88048..06c893a6d84 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -131,8 +131,10 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, PM.add(createARMExpandPseudoPass()); if (EarlyIfConvert && OptLevel != CodeGenOpt::None) { - if (!Subtarget.isThumb1Only()) + if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); + if (Subtarget.isThumb2()) + PM.add(createThumb2ITBlockPass()); } return true; @@ -146,7 +148,8 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, } if (Subtarget.isThumb2()) { - PM.add(createThumb2ITBlockPass()); + if (!EarlyIfConvert) + PM.add(createThumb2ITBlockPass()); PM.add(createThumb2SizeReductionPass()); } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index bd8be969926..d72bb5d731a 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -31,6 +31,7 @@ namespace { MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -52,6 +53,10 @@ namespace { SmallVector &LastUses); bool InsertITBlock(MachineInstr *First, MachineInstr *Last); bool InsertITBlocks(MachineBasicBlock &MBB); + bool MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet &Defs, + SmallSet &Uses); bool InsertITInstructions(MachineBasicBlock &MBB); }; char Thumb2ITBlockPass::ID = 0; @@ -249,20 +254,77 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { return Modified; } -static void TrackDefUses(MachineInstr *MI, SmallSet &Defs, - SmallSet &Uses) { +/// TrackDefUses - Tracking what registers are being defined and used by +/// instructions in the IT block. This also tracks "dependencies", i.e. uses +/// in the IT block that are defined before the IT instruction. +static void TrackDefUses(MachineInstr *MI, + SmallSet &Defs, + SmallSet &Uses, + const TargetRegisterInfo *TRI) { + SmallVector LocalDefs; + SmallVector LocalUses; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg) + if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP) continue; - if (MO.isDef()) - Defs.insert(Reg); + if (MO.isUse()) + LocalUses.push_back(Reg); else - Uses.insert(Reg); + LocalDefs.push_back(Reg); } + + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Uses.insert(*Subreg); + } + + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + Defs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Defs.insert(*Subreg); + if (Reg == ARM::CPSR) + continue; + } +} + +bool +Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet &Defs, + SmallSet &Uses) { + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + assert(SrcSubIdx == 0 && DstSubIdx == 0 && + "Sub-register indices still around?"); + // llvm models select's as two-address instructions. That means a copy + // is inserted before a t2MOVccr, etc. If the copy is scheduled in + // between selects we would end up creating multiple IT blocks. + + // First check if it's safe to move it. + if (Uses.count(DstReg) || Defs.count(SrcReg)) + return false; + + // Then peek at the next instruction to see if it's predicated on CC or OCC. + // If not, then there is nothing to be gained by moving the copy. + MachineBasicBlock::iterator I = MI; ++I; + MachineBasicBlock::iterator E = MI->getParent()->end(); + while (I != E && I->isDebugValue()) + ++I; + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = getPredicate(I, NPredReg); + if (NCC == CC || NCC == OCC) + return true; + } + return false; } bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { @@ -283,15 +345,21 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { Defs.clear(); Uses.clear(); - TrackDefUses(MI, Defs, Uses); + TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); + + // Add implicit use of ITSTATE to IT block instructions. + MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + + MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB; ++MBBI; - // Finalize IT mask. + // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // Branches, including tricky ones like LDM_RET, need to end an IT @@ -306,35 +374,36 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NCC == CC || NCC == OCC) + if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; - else { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + // Add implicit use of ITSTATE. + NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + LastITMI = NMI; + } else { if (NCC == ARMCC::AL && - TII->isMoveInstr(*NMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert(SrcSubIdx == 0 && DstSubIdx == 0 && - "Sub-register indices still around?"); - // llvm models select's as two-address instructions. That means a copy - // is inserted before a t2MOVccr, etc. If the copy is scheduled in - // between selects we would end up creating multiple IT blocks. - if (!Uses.count(DstReg) && !Defs.count(SrcReg)) { - --MBBI; - MBB.remove(NMI); - MBB.insert(InsertPos, NMI); - ++NumMovedInsts; - continue; - } + MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { + --MBBI; + MBB.remove(NMI); + MBB.insert(InsertPos, NMI); + ++NumMovedInsts; + continue; } break; } - TrackDefUses(NMI, Defs, Uses); + TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } + // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); + + // Last instruction in IT block kills ITSTATE. + LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + Modified = true; ++NumITs; } @@ -346,6 +415,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); AFI = Fn.getInfo(); TII = static_cast(TM.getInstrInfo()); + TRI = TM.getRegisterInfo(); if (!AFI->isThumbFunction()) return false; @@ -360,6 +430,9 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { Modified |= InsertITInstructions(MBB); } + if (Modified && !PreRegAlloc) + AFI->setHasITBlocks(true); + return Modified; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index f78111f7b92..866ffb19884 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -17,12 +17,13 @@ #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" +#include "Thumb2HazardRecognizer.h" +#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" -#include "Thumb2InstrInfo.h" using namespace llvm; @@ -35,6 +36,57 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } +void +Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + ARMFunctionInfo *AFI = MBB->getParent()->getInfo(); + if (!AFI->hasITBlocks()) { + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + return; + } + + // If the first instruction of Tail is predicated, we may have to update + // the IT instruction. + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + MachineBasicBlock::iterator MBBI = Tail; + if (CC != ARMCC::AL) + // Expecting at least the t2IT instruction before it. + --MBBI; + + // Actually replace the tail. + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + + // Fix up IT. + if (CC != ARMCC::AL) { + MachineBasicBlock::iterator E = MBB->begin(); + unsigned Count = 4; // At most 4 instructions in an IT block. + while (Count && MBBI != E) { + if (MBBI->isDebugValue()) { + --MBBI; + continue; + } + if (MBBI->getOpcode() == ARM::t2IT) { + unsigned Mask = MBBI->getOperand(1).getImm(); + if (Count == 4) + MBBI->eraseFromParent(); + else { + unsigned MaskOn = 1 << Count; + unsigned MaskOff = ~(MaskOn - 1); + MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn); + } + return; + } + --MBBI; + --Count; + } + + // Ctrl flow can reach here if branch folding is run before IT block + // formation pass. + } +} + bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -116,6 +168,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } +ScheduleHazardRecognizer *Thumb2InstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II); +} + void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 3f233c47868..d5fc3591385 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -20,7 +20,8 @@ #include "Thumb2RegisterInfo.h" namespace llvm { - class ARMSubtarget; +class ARMSubtarget; +class ScheduleHazardRecognizer; class Thumb2InstrInfo : public ARMBaseInstrInfo { Thumb2RegisterInfo RI; @@ -31,6 +32,9 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const; + bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, @@ -60,6 +64,9 @@ public: /// always be able to get register info as well (through this method). /// const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; }; } diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll index d9cac8022b2..7b9d0cf32cf 100644 --- a/test/CodeGen/ARM/ifcvt2.ll +++ b/test/CodeGen/ARM/ifcvt2.ll @@ -1,10 +1,8 @@ -; RUN: llc < %s -march=arm > %t -; RUN: grep bxlt %t | count 1 -; RUN: grep bxgt %t | count 1 -; RUN: not grep bxge %t -; RUN: not grep bxle %t +; RUN: llc < %s -march=arm | FileCheck %s define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK: t1: +; CHECK: bxlt lr %tmp2 = icmp sgt i32 %c, 10 %tmp5 = icmp slt i32 %d, 4 %tmp8 = or i1 %tmp5, %tmp2 @@ -21,6 +19,13 @@ UnifiedReturnBlock: } define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK: t2: +; CHECK: bxgt lr +; CHECK: cmp +; CHECK: addge +; CHECK: subge +; CHECK-NOT: bxge lr +; CHECK: bx lr %tmp2 = icmp sgt i32 %c, 10 %tmp5 = icmp slt i32 %d, 4 %tmp8 = and i1 %tmp5, %tmp2 diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll index 4f4c9af850f..10a4985d173 100644 --- a/test/CodeGen/Thumb2/thumb2-cbnz.ll +++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll @@ -21,7 +21,7 @@ bb7: ; preds = %bb3 bb9: ; preds = %bb7 ; CHECK: cmp r0, #0 -; CHECK-NEXT: cmp r0, #0 +; CHECK: cmp r0, #0 ; CHECK-NEXT: cbnz %0 = tail call double @floor(double %b) nounwind readnone ; [#uses=0] br label %bb11 diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll index 4af492c9308..2c5734881d5 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll @@ -32,6 +32,7 @@ entry: ; CHECK: it eq ; CHECK: cmpeq ; CHECK: bne +; CHECK: cmp ; CHECK: itt eq ; CHECK: moveq ; CHECK: popeq