diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 16fc8a0e372..cec0b8f3c7c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -71,6 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget(); PPCRegInfo = TM.getRegisterInfo(); + PPCII = TM.getInstrInfo(); setPow2DivIsCheap(); @@ -6239,29 +6240,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8)) { - unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? - PPC::ISEL8 : PPC::ISEL; - unsigned SelectPred = MI->getOperand(4).getImm(); + SmallVector Cond; + Cond.push_back(MI->getOperand(4)); + Cond.push_back(MI->getOperand(1)); + DebugLoc dl = MI->getDebugLoc(); - - unsigned SubIdx; - bool SwapOps; - switch (SelectPred) { - default: llvm_unreachable("invalid predicate for isel"); - case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; - case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; - case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; - case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; - case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; - case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; - case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; - case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; - } - - BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(SwapOps? 3 : 2).getReg()) - .addReg(MI->getOperand(SwapOps? 2 : 3).getReg()) - .addReg(MI->getOperand(1).getReg(), 0, SubIdx); + PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond, + MI->getOperand(2).getReg(), MI->getOperand(3).getReg()); } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 7157b70d862..423e9839807 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCInstrInfo.h" #include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -327,6 +328,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; const PPCRegisterInfo *PPCRegInfo; + const PPCInstrInfo *PPCII; public: explicit PPCTargetLowering(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 69c54ed084b..b0dc99977c3 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -417,6 +417,105 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return 2; } +// Select analysis. +bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, int &FalseCycles) const { + if (!TM.getSubtargetImpl()->hasISEL()) + return false; + + if (Cond.size() != 2) + return false; + + // If this is really a bdnz-like condition, then it cannot be turned into a + // select. + if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) + return false; + + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) + return false; + + // isel is for regular integer GPRs only. + if (!PPC::GPRCRegClass.hasSubClassEq(RC) && + !PPC::G8RCRegClass.hasSubClassEq(RC)) + return false; + + // FIXME: These numbers are for the A2, how well they work for other cores is + // an open question. On the A2, the isel instruction has a 2-cycle latency + // but single-cycle throughput. These numbers are used in combination with + // the MispredictPenalty setting from the active SchedMachineModel. + CondCycles = 1; + TrueCycles = 1; + FalseCycles = 1; + + return true; +} + +void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc dl, + unsigned DestReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const { + assert(Cond.size() == 2 && + "PPC branch conditions have two components!"); + + assert(TM.getSubtargetImpl()->hasISEL() && + "Cannot insert select on target without ISEL support"); + + // Get the register classes. + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + assert(RC && "TrueReg and FalseReg must have overlapping register classes"); + assert((PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::G8RCRegClass.hasSubClassEq(RC)) && + "isel is for regular integer GPRs only"); + + unsigned OpCode = + PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8; + unsigned SelectPred = Cond[0].getImm(); + + unsigned SubIdx; + bool SwapOps; + switch (SelectPred) { + default: llvm_unreachable("invalid predicate for isel"); + case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; + case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; + case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; + case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; + case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; + case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; + case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; + case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; + } + + unsigned FirstReg = SwapOps ? FalseReg : TrueReg, + SecondReg = SwapOps ? TrueReg : FalseReg; + + // The first input register of isel cannot be r0. If it is a member + // of a register class that can be r0, then copy it first (the + // register allocator should eliminate the copy). + if (MRI.getRegClass(FirstReg)->contains(PPC::R0) || + MRI.getRegClass(FirstReg)->contains(PPC::X0)) { + const TargetRegisterClass *FirstRC = + MRI.getRegClass(FirstReg)->contains(PPC::X0) ? + &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass; + unsigned OldFirstReg = FirstReg; + FirstReg = MRI.createVirtualRegister(FirstRC); + BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg) + .addReg(OldFirstReg); + } + + BuildMI(MBB, MI, dl, get(OpCode), DestReg) + .addReg(FirstReg).addReg(SecondReg) + .addReg(Cond[1].getReg(), 0, SubIdx); +} + void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 635e3480b06..b851216ebce 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -120,6 +120,17 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const; + + // Select analysis. + virtual bool canInsertSelect(const MachineBasicBlock&, + const SmallVectorImpl &Cond, + unsigned, unsigned, int&, int&, int&) const; + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index fe851c1b6fb..59c82f6cc99 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -86,7 +86,12 @@ public: return getTM(); } + const PPCSubtarget &getPPCSubtarget() const { + return *getPPCTargetMachine().getSubtargetImpl(); + } + virtual bool addPreRegAlloc(); + virtual bool addILPOpts(); virtual bool addInstSelector(); virtual bool addPreEmitPass(); }; @@ -103,6 +108,15 @@ bool PPCPassConfig::addPreRegAlloc() { return false; } +bool PPCPassConfig::addILPOpts() { + if (getPPCSubtarget().hasISEL()) { + addPass(&EarlyIfConverterID); + return true; + } + + return false; +} + bool PPCPassConfig::addInstSelector() { // Install an instruction selector. addPass(createPPCISelDag(getPPCTargetMachine())); diff --git a/test/CodeGen/PowerPC/ifcvt.ll b/test/CodeGen/PowerPC/ifcvt.ll new file mode 100644 index 00000000000..9c966c95b72 --- /dev/null +++ b/test/CodeGen/PowerPC/ifcvt.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) { +entry: + %sext82 = shl i32 %d, 16 + %conv29 = ashr exact i32 %sext82, 16 + %cmp = icmp slt i32 %sext82, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %sw.epilog + %and33 = and i32 %conv29, 32767 + %sub34 = sub nsw i32 %a, %and33 + br label %cond.end + +cond.false: ; preds = %sw.epilog + %add37 = add nsw i32 %conv29, %a + br label %cond.end + +; CHECK: @test +; CHECK: add [[REG:[0-9]+]], +; CHECK: subf [[REG2:[0-9]+]], +; CHECK: isel {{[0-9]+}}, [[REG]], [[REG2]], + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32 [ %sub34, %cond.true ], [ %add37, %cond.false ] + %sext83 = shl i32 %cond, 16 + %conv39 = ashr exact i32 %sext83, 16 + %add41 = sub i32 %b, %a + %sub43 = add i32 %add41, %conv39 + ret i32 %sub43 +} +