diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 041302b399b..ca49d95d2b6 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2295,6 +2295,37 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } } +/// getCMovFromCond - Return a cmov(rr) opcode for the given condition and +/// register size in bytes. +static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes) { + static const unsigned Opc[16][3] = { + { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, + { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, + { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, + { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr }, + { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr }, + { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr }, + { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr }, + { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr }, + { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr }, + { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr }, + { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr }, + { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr }, + { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr }, + { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr }, + { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr }, + { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr } + }; + + assert(CC < 16 && "Can only handle standard cond codes"); + switch(RegBytes) { + default: llvm_unreachable("Illegal register size!"); + case 2: return Opc[CC][0]; + case 4: return Opc[CC][1]; + case 8: return Opc[CC][2]; + } +} + bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { if (!MI->isTerminator()) return false; @@ -2519,6 +2550,55 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } +bool X86InstrInfo:: +canInsertSelect(const MachineBasicBlock &MBB, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, int &FalseCycles) const { + // Not all subtargets have cmov instructions. + if (!TM.getSubtarget().hasCMov()) + return false; + if (Cond.size() != 1) + return false; + // We cannot do the composite conditions, at least not in SSA form. + if ((X86::CondCode)Cond[0].getImm() > X86::COND_S) + return false; + + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) + return false; + + // We have cmov instructions for 16, 32, and 64 bit general purpose registers. + if (X86::GR16RegClass.hasSubClassEq(RC) || + X86::GR32RegClass.hasSubClassEq(RC) || + X86::GR64RegClass.hasSubClassEq(RC)) { + // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy + // Bridge. Probably Ivy Bridge as well. + CondCycles = 2; + TrueCycles = 2; + FalseCycles = 2; + return true; + } + + // Can't do vectors. + return false; +} + +void X86InstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + assert(Cond.size() == 1 && "Invalid Cond array"); + unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), + MRI.getRegClass(DstReg)->getSize()); + BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); +} + /// isHReg - Test if the given register is a physical h register. static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 4006dad6847..20096684063 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -219,6 +219,14 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const; + virtual bool canInsertSelect(const MachineBasicBlock&, + const SmallVectorImpl &Cond, + unsigned, unsigned, int&, int&, int&) const; + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index c66a5542946..b7ba568394b 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -140,7 +140,12 @@ public: } // namespace TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { - return new X86PassConfig(this, PM); + X86PassConfig *PC = new X86PassConfig(this, PM); + + if (Subtarget.hasCMov()) + PC->enablePass(&EarlyIfConverterID); + + return PC; } bool X86PassConfig::addInstSelector() { diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll new file mode 100644 index 00000000000..ce1fe04f980 --- /dev/null +++ b/test/CodeGen/X86/early-ifcvt.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -enable-early-ifcvt -stress-early-ifcvt | FileCheck %s +target triple = "x86_64-apple-macosx10.8.0" + +; CHECK: mm2 +define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp { +entry: + br label %do.body + +; CHECK: do.body +; Loop body has no branches before the backedge. +; CHECK-NOT: LBB +do.body: + %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ] + %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ] + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ] + %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ] + %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1 + %0 = load i32* %p.addr.0, align 4 + %cmp = icmp sgt i32 %0, %max.0 + br i1 %cmp, label %do.cond, label %if.else + +if.else: + %cmp1 = icmp slt i32 %0, %min.0 + %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0 + br label %do.cond + +do.cond: + %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ] + %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ] +; CHECK: decl %esi +; CHECK: jne LBB + %dec = add i32 %n.addr.0, -1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: + %sub = sub nsw i32 %max.1, %min.1 + ret i32 %sub +}