[PowerPC] Call support for fast-isel.

This patch adds fast-isel support for calls (but not intrinsic calls
or varargs calls).  It also removes a badly-formed assert.  There are
some new tests just for calls, and also for folding loads into
arguments on calls to avoid extra extends.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189701 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2013-08-30 22:18:55 +00:00
parent 06f9db01ba
commit 11addd2a2f
5 changed files with 504 additions and 3 deletions

View File

@ -42,6 +42,17 @@ def RetCC_PPC : CallingConv<[
// logic. FIXME: See if the logic can be simplified with use of CCs.
// This may require some extensions to current table generation.
// Simple calling convention for 64-bit ELF PowerPC fast isel.
// Only handle ints and floats. All ints are promoted to i64.
// Vector types and quadword ints are not handled.
def CC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
CCIfType<[i32], CCPromoteToType<i64>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>
]>;
// Simple return-value convention for 64-bit ELF PowerPC fast isel.
// All small ints are promoted to i64. Vector types, quadword ints,
// and multiple register returns are "supported" to avoid compile

View File

@ -114,6 +114,7 @@ class PPCFastISel : public FastISel {
bool SelectIToFP(const Instruction *I, bool IsSigned);
bool SelectFPToI(const Instruction *I, bool IsSigned);
bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
bool SelectCall(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectIntExt(const Instruction *I);
@ -145,6 +146,17 @@ class PPCFastISel : public FastISel {
// Call handling routines.
private:
bool processCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg);
void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg);
CCAssignFn *usePPC32CCs(unsigned Flag);
private:
@ -1150,6 +1162,316 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
return true;
}
// Handle arguments to a call that we're attempting to fast-select.
// Return false if the arguments are too complex for us at the moment.
bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
SmallVectorImpl<unsigned> &RegArgs,
CallingConv::ID CC,
unsigned &NumBytes,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
// Bail out if we can't handle any of the arguments.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
MVT ArgVT = ArgVTs[VA.getValNo()];
// Skip vector arguments for now, as well as long double and
// uint128_t, and anything that isn't passed in a register.
if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
!VA.isRegLoc() || VA.needsCustom())
return false;
// Skip bit-converted arguments for now.
if (VA.getLocInfo() == CCValAssign::BCvt)
return false;
}
// Get a count of how many bytes are to be pushed onto the stack.
NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TII.getCallFrameSetupOpcode()))
.addImm(NumBytes);
// Prepare to assign register arguments. Every argument uses up a
// GPR protocol register even if it's passed in a floating-point
// register.
unsigned NextGPR = PPC::X3;
unsigned NextFPR = PPC::F1;
// Process arguments.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
CCValAssign &VA = ArgLocs[I];
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
// Handle argument promotion and bitcasts.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
llvm_unreachable("Failed to emit a sext!");
ArgVT = DestVT;
Arg = TmpReg;
break;
}
case CCValAssign::AExt:
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
unsigned TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
llvm_unreachable("Failed to emit a zext!");
ArgVT = DestVT;
Arg = TmpReg;
break;
}
case CCValAssign::BCvt: {
// FIXME: Not yet handled.
llvm_unreachable("Should have bailed before getting here!");
break;
}
}
// Copy this argument to the appropriate register.
unsigned ArgReg;
if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
ArgReg = NextFPR++;
++NextGPR;
} else
ArgReg = NextGPR++;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ArgReg).addReg(Arg);
RegArgs.push_back(ArgReg);
}
return true;
}
// For a call that we've determined we can fast-select, finish the
// call sequence and generate a copy to obtain the return value (if any).
void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg) {
// Issue CallSEQ_END.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TII.getCallFrameDestroyOpcode()))
.addImm(NumBytes).addImm(0);
// Next, generate a copy to obtain the return value.
// FIXME: No multi-register return values yet, though I don't foresee
// any real difficulties there.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
CCValAssign &VA = RVLocs[0];
assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
assert(VA.isRegLoc() && "Can only return in registers!");
MVT DestVT = VA.getValVT();
MVT CopyVT = DestVT;
// Ints smaller than a register still arrive in a full 64-bit
// register, so make sure we recognize this.
if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
CopyVT = MVT::i64;
unsigned SourcePhysReg = VA.getLocReg();
unsigned ResultReg;
if (RetVT == CopyVT) {
const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
ResultReg = createResultReg(CpyRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
// If necessary, round the floating result to single precision.
} else if (CopyVT == MVT::f64) {
ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
ResultReg).addReg(SourcePhysReg);
// If only the low half of a general register is needed, generate
// a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
// used along the fast-isel path (not lowered), and downstream logic
// also doesn't like a direct subreg copy on a physical reg.)
} else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
ResultReg = createResultReg(&PPC::GPRCRegClass);
// Convert physical register from G8RC to GPRC.
SourcePhysReg -= PPC::X0 - PPC::R0;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
}
UsedRegs.push_back(SourcePhysReg);
UpdateValueMap(I, ResultReg);
}
}
// Attempt to fast-select a call instruction.
bool PPCFastISel::SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
// Can't handle inline asm.
if (isa<InlineAsm>(Callee))
return false;
// Allow SelectionDAG isel to handle tail calls.
if (CI->isTailCall())
return false;
// Obtain calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
bool IsVarArg = FTy->isVarArg();
// Not ready for varargs yet.
if (IsVarArg)
return false;
// Handle simple calls for now, with legal return types and
// those that can be extended.
Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
// FIXME: No multi-register return values yet.
if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
RetVT != MVT::f64) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
if (RVLocs.size() > 1)
return false;
}
// Bail early if more than 8 arguments, as we only currently
// handle arguments passed in registers.
unsigned NumArgs = CS.arg_size();
if (NumArgs > 8)
return false;
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
SmallVector<unsigned, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(NumArgs);
ArgRegs.reserve(NumArgs);
ArgVTs.reserve(NumArgs);
ArgFlags.reserve(NumArgs);
for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
II != IE; ++II) {
// FIXME: ARM does something for intrinsic calls here, check into that.
unsigned AttrIdx = II - CS.arg_begin() + 1;
// Only handle easy calls for now. It would be reasonably easy
// to handle <= 8-byte structures passed ByVal in registers, but we
// have to ensure they are right-justified in the register.
if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
CS.paramHasAttr(AttrIdx, Attribute::ByVal))
return false;
ISD::ArgFlagsTy Flags;
if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
Flags.setSExt();
if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
Flags.setZExt();
Type *ArgTy = (*II)->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
return false;
if (ArgVT.isVector())
return false;
unsigned Arg = getRegForValue(*II);
if (Arg == 0)
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
Args.push_back(*II);
ArgRegs.push_back(Arg);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
}
// Process the arguments.
SmallVector<unsigned, 8> RegArgs;
unsigned NumBytes;
if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
RegArgs, CC, NumBytes, IsVarArg))
return false;
// FIXME: No handling for function pointers yet. This requires
// implementing the function descriptor (OPD) setup.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
if (!GV)
return false;
// Build direct call with NOP for TOC restore.
// FIXME: We can and should optimize away the NOP for local calls.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(PPC::BL8_NOP));
// Add callee.
MIB.addGlobalAddress(GV);
// Add implicit physical register uses to the call.
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
// Set all unused physregs defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
return true;
}
// Attempt to fast-select a return instruction.
bool PPCFastISel::SelectRet(const Instruction *I) {
@ -1414,6 +1736,10 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
return SelectBinaryIntOp(I, ISD::OR);
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::Call:
if (dyn_cast<IntrinsicInst>(I))
return false;
return SelectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::ZExt:
@ -1490,7 +1816,6 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// If GV is an alias, use the aliasee for determining thread-locality.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
assert((GVar || isa<Function>(GV)) && "Unexpected GV subclass!");
}
// FIXME: We don't yet handle the complexity of TLS.

View File

@ -1813,8 +1813,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
// Function whose sole purpose is to kill compiler warnings
// stemming from unused functions included from PPCGenCallingConv.inc.
CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
/* One of these will be CC_PPC64_ELF_FIS in a future patch. */
return Flag ? RetCC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
}
bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,

View File

@ -0,0 +1,132 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define i32 @t1(i8 signext %a) nounwind {
%1 = sext i8 %a to i32
ret i32 %1
}
define i32 @t2(i8 zeroext %a) nounwind {
%1 = zext i8 %a to i32
ret i32 %1
}
define i32 @t3(i16 signext %a) nounwind {
%1 = sext i16 %a to i32
ret i32 %1
}
define i32 @t4(i16 zeroext %a) nounwind {
%1 = zext i16 %a to i32
ret i32 %1
}
define void @foo(i8 %a, i16 %b) nounwind {
; ELF64: foo
%1 = call i32 @t1(i8 signext %a)
; ELF64: extsb
%2 = call i32 @t2(i8 zeroext %a)
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
%3 = call i32 @t3(i16 signext %b)
; ELF64: extsh
%4 = call i32 @t4(i16 zeroext %b)
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
;; A few test to check materialization
%5 = call i32 @t2(i8 zeroext 255)
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
%6 = call i32 @t4(i16 zeroext 65535)
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
ret void
}
define void @foo2() nounwind {
%1 = call signext i16 @t5()
%2 = call zeroext i16 @t6()
%3 = call signext i8 @t7()
%4 = call zeroext i8 @t8()
ret void
}
declare signext i16 @t5();
declare zeroext i16 @t6();
declare signext i8 @t7();
declare zeroext i8 @t8();
define i32 @t10(i32 %argc, i8** nocapture %argv) {
entry:
; ELF64: t10
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
; ELF64: li 3, 0
; ELF64: li 4, 248
; ELF64: li 5, 187
; ELF64: li 6, 28
; ELF64: li 7, 40
; ELF64: li 8, 186
; ELF64: rldicl 3, 3, 0, 56
; ELF64: rldicl 4, 4, 0, 56
; ELF64: rldicl 5, 5, 0, 56
; ELF64: rldicl 6, 6, 0, 56
; ELF64: rldicl 7, 7, 0, 56
; ELF64: rldicl 8, 8, 0, 56
ret i32 0
}
declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
define i32 @bar0(i32 %i) nounwind {
ret i32 0
}
; Function pointers are not yet implemented.
;define void @foo3() uwtable {
; %fptr = alloca i32 (i32)*, align 8
; store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
; %1 = load i32 (i32)** %fptr, align 8
; %call = call i32 %1(i32 0)
; ret void
;}
; Intrinsic calls not yet implemented, and udiv isn't one for PPC anyway.
;define i32 @LibCall(i32 %a, i32 %b) {
;entry:
; %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
; ret i32 %tmp1
;}
declare void @float_foo(float %f) ssp
define void @float_const() ssp {
entry:
; ELF64: float_const
call void @float_foo(float 0x401C666660000000)
; ELF64: addis [[REG:[0-9]+]], 2, .LCPI[[SUF:[0-9_]+]]@toc@ha
; ELF64: lfs 1, .LCPI[[SUF]]@toc@l([[REG]])
ret void
}
define void @float_reg(float %dummy, float %f) ssp {
entry:
; ELF64: float_reg
call void @float_foo(float %f)
; ELF64: fmr 1, 2
ret void
}
declare void @double_foo(double %d) ssp
define void @double_const() ssp {
entry:
; ELF64: double_const
call void @double_foo(double 0x1397723CCABD0000401C666660000000)
; ELF64: addis [[REG2:[0-9]+]], 2, .LCPI[[SUF2:[0-9_]+]]@toc@ha
; ELF64: lfd 1, .LCPI[[SUF2]]@toc@l([[REG2]])
ret void
}
define void @double_reg(double %dummy, double %d) ssp {
entry:
; ELF64: double_reg
call void @double_foo(double %d)
; ELF64: fmr 1, 2
ret void
}

View File

@ -4,6 +4,40 @@
@b = global i16 2, align 2
@c = global i32 4, align 4
define void @t1() nounwind uwtable ssp {
; ELF64: t1
%1 = load i8* @a, align 1
call void @foo1(i8 zeroext %1)
; ELF64: lbz
; ELF64-NOT: rldicl
; ELF64-NOT: rlwinm
ret void
}
define void @t2() nounwind uwtable ssp {
; ELF64: t2
%1 = load i16* @b, align 2
call void @foo2(i16 zeroext %1)
; ELF64: lhz
; ELF64-NOT: rldicl
; ELF64-NOT: rlwinm
ret void
}
define void @t2a() nounwind uwtable ssp {
; ELF64: t2a
%1 = load i32* @c, align 4
call void @foo3(i32 zeroext %1)
; ELF64: lwz
; ELF64-NOT: rldicl
; ELF64-NOT: rlwinm
ret void
}
declare void @foo1(i8 zeroext)
declare void @foo2(i16 zeroext)
declare void @foo3(i32 zeroext)
define i32 @t3() nounwind uwtable ssp {
; ELF64: t3
%1 = load i8* @a, align 1