[PowerPC] More fast-isel chunks (returns and integer extends)

Incremental improvement to fast-isel for PPC64.  This allows us to
select on ret, sext, and zext.  Filling in sext/zext improves some of
the existing logic in handling compare-immediates that needed extends.

A simplified return convention for fast-isel is also added to the
PPC64 calling conventions.  All call/return processing for DAG
selection is handled with custom code, so there isn't an existing CC
to rely on here.  The include of PPCGenCallingConv.inc causes compiler
warnings due to the 32-bit calling conventions that are not used, so
the dummy function "usePPC32CCs()" is added here to silence those.

Test cases for the return and extend logic are added.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189266 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2013-08-26 19:42:51 +00:00
parent 551023c1e4
commit 055d207426
5 changed files with 472 additions and 3 deletions

View File

@ -37,6 +37,26 @@ def RetCC_PPC : CallingConv<[
]>;
// Note that we don't currently have calling conventions for 64-bit
// PowerPC, but handle all the complexities of the ABI in the lowering
// logic. FIXME: See if the logic can be simplified with use of CCs.
// This may require some extensions to current table generation.
// Simple return-value convention for 64-bit ELF PowerPC fast isel.
// All small ints are promoted to i64. Vector types, quadword ints,
// and multiple register returns are "supported" to avoid compile
// errors, but none are handled by the fast selector.
def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
CCIfType<[i32], CCPromoteToType<i64>>,
CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
]>;
//===----------------------------------------------------------------------===//
// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//

View File

@ -95,6 +95,8 @@ class PPCFastISel : public FastISel {
private:
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
@ -109,6 +111,10 @@ class PPCFastISel : public FastISel {
unsigned PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC);
// Call handling routines.
private:
CCAssignFn *usePPC32CCs(unsigned Flag);
private:
#include "PPCGenFastISel.inc"
@ -116,6 +122,21 @@ class PPCFastISel : public FastISel {
} // end anonymous namespace
#include "PPCGenCallingConv.inc"
// Function whose sole purpose is to kill compiler warnings
// stemming from unused functions included from PPCGenCallingConv.inc.
CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
if (Flag == 1)
return CC_PPC32_SVR4;
else if (Flag == 2)
return CC_PPC32_SVR4_ByVal;
else if (Flag == 3)
return CC_PPC32_SVR4_VarArg;
else
return RetCC_PPC;
}
static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
switch (Pred) {
// These are not representable with any single compare.
@ -309,13 +330,164 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
return true;
}
// Attempt to fast-select a return instruction.
bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
const Value *RV = Ret->getOperand(0);
// FIXME: Only one output register for now.
if (ValLocs.size() > 1)
return false;
// Special case for returning a constant integer of any size.
// Materialize the constant as an i64 and copy it to the return
// register. This avoids an unnecessary extend or truncate.
if (isa<ConstantInt>(*RV)) {
const Constant *C = cast<Constant>(RV);
unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
unsigned RetReg = ValLocs[0].getLocReg();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
RetReg).addReg(SrcReg);
RetRegs.push_back(RetReg);
} else {
unsigned Reg = getRegForValue(RV);
if (Reg == 0)
return false;
// Copy the result values into the output registers.
for (unsigned i = 0; i < ValLocs.size(); ++i) {
CCValAssign &VA = ValLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
RetRegs.push_back(VA.getLocReg());
unsigned SrcReg = Reg + VA.getValNo();
EVT RVEVT = TLI.getValueType(RV->getType());
if (!RVEVT.isSimple())
return false;
MVT RVVT = RVEVT.getSimpleVT();
MVT DestVT = VA.getLocVT();
if (RVVT != DestVT && RVVT != MVT::i8 &&
RVVT != MVT::i16 && RVVT != MVT::i32)
return false;
if (RVVT != DestVT) {
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
llvm_unreachable("Full value assign but types don't match?");
case CCValAssign::AExt:
case CCValAssign::ZExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
return false;
SrcReg = TmpReg;
break;
}
case CCValAssign::SExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
return false;
SrcReg = TmpReg;
break;
}
}
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::COPY), RetRegs[i])
.addReg(SrcReg);
}
}
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(PPC::BLR));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
// Attempt to emit an integer extend of SrcReg into DestReg. Both
// signed and zero extensions are supported. Return false if we
// can't handle it. Not yet implemented.
// can't handle it.
bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg, bool IsZExt) {
return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg
&& IsZExt && false);
if (DestVT != MVT::i32 && DestVT != MVT::i64)
return false;
if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
return false;
// Signed extensions use EXTSB, EXTSH, EXTSW.
if (!IsZExt) {
unsigned Opc;
if (SrcVT == MVT::i8)
Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
else if (SrcVT == MVT::i16)
Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
else {
assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
Opc = PPC::EXTSW_32_64;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Unsigned 32-bit extensions use RLWINM.
} else if (DestVT == MVT::i32) {
unsigned MB;
if (SrcVT == MVT::i8)
MB = 24;
else {
assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
MB = 16;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
// Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
} else {
unsigned MB;
if (SrcVT == MVT::i8)
MB = 56;
else if (SrcVT == MVT::i16)
MB = 48;
else
MB = 32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(PPC::RLDICL_32_64), DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
}
return true;
}
// Attempt to fast-select an indirect branch instruction.
@ -335,6 +507,45 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
return true;
}
// Attempt to fast-select an integer extend instruction.
bool PPCFastISel::SelectIntExt(const Instruction *I) {
Type *DestTy = I->getType();
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
bool IsZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
SrcEVT = TLI.getValueType(SrcTy, true);
DestEVT = TLI.getValueType(DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
// If we know the register class needed for the result of this
// instruction, use it. Otherwise pick the register class of the
// correct size that does not contain X0/R0, since we don't know
// whether downstream uses permit that assignment.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
(AssignedReg ? MRI.getRegClass(AssignedReg) :
(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass));
unsigned ResultReg = createResultReg(RC);
if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
return false;
UpdateValueMap(I, ResultReg);
return true;
}
// Attempt to fast-select an instruction that wasn't handled by
// the table-generated machinery.
bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
@ -344,6 +555,11 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
return SelectBranch(I);
case Instruction::IndirectBr:
return SelectIndirectBr(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
// Here add other flavors of Instruction::XXX that automated
// cases don't catch. For example, switches are terminators
// that aren't yet handled.

View File

@ -506,6 +506,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
[(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
} // Interpretation64Bit
// For fast-isel:
let isCodeGenOnly = 1 in {
def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
"extsb $rA, $rS", IntSimple, []>, isPPC64;
def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
"extsh $rA, $rS", IntSimple, []>, isPPC64;
} // isCodeGenOnly for fast-isel
defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
"extsw", "$rA, $rS", IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
@ -569,6 +577,14 @@ defm RLDICL : MDForm_1r<30, 0,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
// For fast-isel:
let isCodeGenOnly = 1 in
def RLDICL_32_64 : MDForm_1<30, 0,
(outs g8rc:$rA),
(ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
// End fast-isel.
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,

View File

@ -0,0 +1,75 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
; zext
define i32 @zext_8_32(i8 %a) nounwind ssp {
; ELF64: zext_8_32
%r = zext i8 %a to i32
; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
ret i32 %r
}
define i32 @zext_16_32(i16 %a) nounwind ssp {
; ELF64: zext_16_32
%r = zext i16 %a to i32
; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
ret i32 %r
}
define i64 @zext_8_64(i8 %a) nounwind ssp {
; ELF64: zext_8_64
%r = zext i8 %a to i64
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
ret i64 %r
}
define i64 @zext_16_64(i16 %a) nounwind ssp {
; ELF64: zext_16_64
%r = zext i16 %a to i64
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
ret i64 %r
}
define i64 @zext_32_64(i32 %a) nounwind ssp {
; ELF64: zext_32_64
%r = zext i32 %a to i64
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
ret i64 %r
}
; sext
define i32 @sext_8_32(i8 %a) nounwind ssp {
; ELF64: sext_8_32
%r = sext i8 %a to i32
; ELF64: extsb
ret i32 %r
}
define i32 @sext_16_32(i16 %a) nounwind ssp {
; ELF64: sext_16_32
%r = sext i16 %a to i32
; ELF64: extsh
ret i32 %r
}
define i64 @sext_8_64(i8 %a) nounwind ssp {
; ELF64: sext_8_64
%r = sext i8 %a to i64
; ELF64: extsb
ret i64 %r
}
define i64 @sext_16_64(i16 %a) nounwind ssp {
; ELF64: sext_16_64
%r = sext i16 %a to i64
; ELF64: extsh
ret i64 %r
}
define i64 @sext_32_64(i32 %a) nounwind ssp {
; ELF64: sext_32_64
%r = sext i32 %a to i64
; ELF64: extsw
ret i64 %r
}

View File

@ -0,0 +1,142 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
entry:
; ELF64: ret2
; ELF64: extsb
; ELF64: blr
ret i8 %a
}
define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
entry:
; ELF64: ret3
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
; ELF64: blr
ret i8 %a
}
define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
entry:
; ELF64: ret4
; ELF64: extsh
; ELF64: blr
ret i16 %a
}
define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
entry:
; ELF64: ret5
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
; ELF64: blr
ret i16 %a
}
define i16 @ret6(i16 %a) nounwind uwtable ssp {
entry:
; ELF64: ret6
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
; ELF64: blr
ret i16 %a
}
define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
entry:
; ELF64: ret7
; ELF64: extsw
; ELF64: blr
ret i32 %a
}
define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
entry:
; ELF64: ret8
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
; ELF64: blr
ret i32 %a
}
define i32 @ret9(i32 %a) nounwind uwtable ssp {
entry:
; ELF64: ret9
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
; ELF64: blr
ret i32 %a
}
define i64 @ret10(i64 %a) nounwind uwtable ssp {
entry:
; ELF64: ret10
; ELF64-NOT: exts
; ELF64-NOT: rldicl
; ELF64: blr
ret i64 %a
}
define float @ret11(float %a) nounwind uwtable ssp {
entry:
; ELF64: ret11
; ELF64: blr
ret float %a
}
define double @ret12(double %a) nounwind uwtable ssp {
entry:
; ELF64: ret12
; ELF64: blr
ret double %a
}
define i8 @ret13() nounwind uwtable ssp {
entry:
; ELF64: ret13
; ELF64: li
; ELF64: blr
ret i8 15;
}
define i16 @ret14() nounwind uwtable ssp {
entry:
; ELF64: ret14
; ELF64: li
; ELF64: blr
ret i16 -225;
}
define i32 @ret15() nounwind uwtable ssp {
entry:
; ELF64: ret15
; ELF64: lis
; ELF64: ori
; ELF64: blr
ret i32 278135;
}
define i64 @ret16() nounwind uwtable ssp {
entry:
; ELF64: ret16
; ELF64: li
; ELF64: sldi
; ELF64: oris
; ELF64: ori
; ELF64: blr
ret i64 27813515225;
}
define float @ret17() nounwind uwtable ssp {
entry:
; ELF64: ret17
; ELF64: addis
; ELF64: lfs
; ELF64: blr
ret float 2.5;
}
define double @ret18() nounwind uwtable ssp {
entry:
; ELF64: ret18
; ELF64: addis
; ELF64: lfd
; ELF64: blr
ret double 2.5e-33;
}