[PowerPC] Add fast-isel branch and compare selection.

First chunk of actual fast-isel selection code.  This handles direct
and indirect branches, as well as feeding compares for direct
branches.  PPCFastISel::PPCEmitIntExt() is just roughed in and will be
expanded in a future patch.  This also corrects a problem with
selection for constant pool entries in JIT mode or with small code
model.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189202 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2013-08-25 22:33:42 +00:00
parent a4959f3f6e
commit 3fad2bcd25
3 changed files with 330 additions and 9 deletions

View File

@ -89,9 +89,19 @@ class PPCFastISel : public FastISel {
virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
virtual bool FastLowerArguments();
virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
// Instruction selection routines.
private:
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
// Utility routines.
private:
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg);
bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeInt(const Constant *C, MVT VT);
unsigned PPCMaterialize32BitInt(int64_t Imm,
@ -106,10 +116,241 @@ class PPCFastISel : public FastISel {
} // end anonymous namespace
static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
switch (Pred) {
// These are not representable with any single compare.
case CmpInst::FCMP_FALSE:
case CmpInst::FCMP_UEQ:
case CmpInst::FCMP_UGT:
case CmpInst::FCMP_UGE:
case CmpInst::FCMP_ULT:
case CmpInst::FCMP_ULE:
case CmpInst::FCMP_UNE:
case CmpInst::FCMP_TRUE:
default:
return Optional<PPC::Predicate>();
case CmpInst::FCMP_OEQ:
case CmpInst::ICMP_EQ:
return PPC::PRED_EQ;
case CmpInst::FCMP_OGT:
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_SGT:
return PPC::PRED_GT;
case CmpInst::FCMP_OGE:
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_SGE:
return PPC::PRED_GE;
case CmpInst::FCMP_OLT:
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_SLT:
return PPC::PRED_LT;
case CmpInst::FCMP_OLE:
case CmpInst::ICMP_ULE:
case CmpInst::ICMP_SLE:
return PPC::PRED_LE;
case CmpInst::FCMP_ONE:
case CmpInst::ICMP_NE:
return PPC::PRED_NE;
case CmpInst::FCMP_ORD:
return PPC::PRED_NU;
case CmpInst::FCMP_UNO:
return PPC::PRED_UN;
}
}
// Attempt to fast-select a branch instruction.
bool PPCFastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *BrBB = FuncInfo.MBB;
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// For now, just try the simplest case where it's fed by a compare.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
if (!OptPPCPred)
return false;
PPC::Predicate PPCPred = OptPPCPred.getValue();
// Take advantage of fall-through opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
PPCPred = PPC::InvertPredicate(PPCPred);
}
unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
CondReg))
return false;
BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
FastEmitBranch(FBB, DL);
FuncInfo.MBB->addSuccessor(TBB);
return true;
} else if (const ConstantInt *CI =
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
FastEmitBranch(Target, DL);
return true;
}
// FIXME: ARM looks for a case where the block containing the compare
// has been split from the block containing the branch. If this happens,
// there is a vreg available containing the result of the compare. I'm
// not sure we can do much, as we've lost the predicate information with
// the compare instruction -- we have a 4-bit CR but don't know which bit
// to test here.
return false;
}
// Attempt to emit a compare of the two source values. Signed and unsigned
// comparisons are supported. Return false if we can't handle it.
bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
bool IsZExt, unsigned DestReg) {
Type *Ty = SrcValue1->getType();
EVT SrcEVT = TLI.getValueType(Ty, true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
// See if operand 2 is an immediate encodeable in the compare.
// FIXME: Operands are not in canonical order at -O0, so an immediate
// operand in position 1 is a lost opportunity for now. We are
// similar to ARM in this regard.
long Imm = 0;
bool UseImm = false;
// Only 16-bit integer constants can be represented in compares for
// PowerPC. Others will be materialized into a register.
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
SrcVT == MVT::i8 || SrcVT == MVT::i1) {
const APInt &CIVal = ConstInt->getValue();
Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
UseImm = true;
}
}
unsigned CmpOpc;
bool NeedsExt = false;
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
CmpOpc = PPC::FCMPUS;
break;
case MVT::f64:
CmpOpc = PPC::FCMPUD;
break;
case MVT::i1:
case MVT::i8:
case MVT::i16:
NeedsExt = true;
// Intentional fall-through.
case MVT::i32:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
else
CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
break;
case MVT::i64:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
else
CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
break;
}
unsigned SrcReg1 = getRegForValue(SrcValue1);
if (SrcReg1 == 0)
return false;
unsigned SrcReg2 = 0;
if (!UseImm) {
SrcReg2 = getRegForValue(SrcValue2);
if (SrcReg2 == 0)
return false;
}
if (NeedsExt) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg1 = ExtReg;
if (!UseImm) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg2 = ExtReg;
}
}
if (!UseImm)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addReg(SrcReg2);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addImm(Imm);
return true;
}
// Attempt to emit an integer extend of SrcReg into DestReg. Both
// signed and zero extensions are supported. Return false if we
// can't handle it. Not yet implemented.
bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg, bool IsZExt) {
return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg
&& IsZExt && false);
}
// Attempt to fast-select an indirect branch instruction.
bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
unsigned AddrReg = getRegForValue(I->getOperand(0));
if (AddrReg == 0)
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
.addReg(AddrReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
return true;
}
// Attempt to fast-select an instruction that wasn't handled by
// the table-generated machinery. TBD.
// the table-generated machinery.
bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
return I && false;
switch (I->getOpcode()) {
case Instruction::Br:
return SelectBranch(I);
case Instruction::IndirectBr:
return SelectIndirectBr(I);
// Here add other flavors of Instruction::XXX that automated
// cases don't catch. For example, switches are terminators
// that aren't yet handled.
default:
break;
}
return false;
}
// Materialize a floating-point constant into a register, and return
@ -131,15 +372,18 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
(VT == MVT::f32) ? 4 : 8, Align);
// For small code model, generate a LDtocCPT.
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
DestReg)
.addConstantPoolIndex(Idx).addReg(PPC::X2).addMemOperand(MMO);
else {
TmpReg)
.addConstantPoolIndex(Idx).addReg(PPC::X2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg).addMemOperand(MMO);
} else {
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
@ -312,6 +556,25 @@ bool PPCFastISel::FastLowerArguments() {
return false;
}
// Handle materializing integer constants into a register. This is not
// automatically generated for PowerPC, so must be explicitly created here.
unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
VT != MVT::i8 && VT != MVT::i1)
return 0;
const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
&PPC::GPRCRegClass);
if (VT == MVT::i64)
return PPCMaterialize64BitInt(Imm, RC);
else
return PPCMaterialize32BitInt(Imm, RC);
}
namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,

View File

@ -0,0 +1,43 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
entry:
; ELF64: t1
%x = add i32 %a, %b
br i1 1, label %if.then, label %if.else
; ELF64-NOT: b {{\.?}}LBB0_1
if.then: ; preds = %entry
call void @foo1()
br label %if.end7
if.else: ; preds = %entry
br i1 0, label %if.then2, label %if.else3
; ELF64: b {{\.?}}LBB0_4
if.then2: ; preds = %if.else
call void @foo2()
br label %if.end6
if.else3: ; preds = %if.else
%y = sub i32 %a, %b
br i1 1, label %if.then5, label %if.end
; ELF64-NOT: b {{\.?}}LBB0_5
if.then5: ; preds = %if.else3
call void @foo1()
br label %if.end
if.end: ; preds = %if.then5, %if.else3
br label %if.end6
if.end6: ; preds = %if.end, %if.then2
br label %if.end7
if.end7: ; preds = %if.end6, %if.then
ret i32 0
}
declare void @foo1()
declare void @foo2()

View File

@ -0,0 +1,15 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define void @t1(i8* %x) {
entry:
; ELF64: t1
br label %L0
L0:
br label %L1
L1:
indirectbr i8* %x, [ label %L0, label %L1 ]
; ELF64: mtctr 3
; ELF64: bctr
}