Implement inalloca codegen for x86 with the new inalloca design

Calls with inalloca are lowered by skipping all stores for arguments
passed in memory and the initial stack adjustment to allocate argument
memory.

Now the frontend is responsible for the memory layout, and the backend
doesn't have to do any work.  As a result these changes are pretty
minimal.

Reviewers: echristo

Differential Revision: http://llvm-reviews.chandlerc.com/D2637

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200596 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Reid Kleckner 2014-01-31 23:50:57 +00:00
parent cb6684b63b
commit 8a24e83550
13 changed files with 251 additions and 17 deletions

View File

@ -792,7 +792,10 @@ Currently, only the following parameter attributes are defined:
An argument allocation may be used by a call at most once because
the call may deallocate it. The ``inalloca`` attribute cannot be
used in conjunction with other attributes that affect argument
storage, like ``inreg``, ``nest``, ``sret``, or ``byval``.
storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. The
``inalloca`` attribute also disables LLVM's implicit lowering of
large aggregate return values, which means that frontend authors
must lower them with ``sret`` pointers.
When the call site is reached, the argument allocation must have
been the most recent stack allocation that is still live, or the

View File

@ -268,9 +268,10 @@ public:
paramHasAttr(ArgNo + 1, Attribute::InAlloca);
}
/// @brief Determine if there are any inalloca arguments.
/// @brief Determine if there are is an inalloca argument. Only the last
/// argument can have the inalloca attribute.
bool hasInAllocaArgument() const {
return getAttributes().hasAttrSomewhere(Attribute::InAlloca);
return paramHasAttr(arg_size(), Attribute::InAlloca);
}
bool doesNotAccessMemory(unsigned ArgNo) const {

View File

@ -42,6 +42,8 @@ namespace ISD {
static const uint64_t ByValAlignOffs = 7;
static const uint64_t Split = 1ULL<<11;
static const uint64_t SplitOffs = 11;
static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca
static const uint64_t InAllocaOffs = 12;
static const uint64_t OrigAlign = 0x1FULL<<27;
static const uint64_t OrigAlignOffs = 27;
static const uint64_t ByValSize = 0xffffffffULL<<32; ///< Struct size
@ -68,6 +70,9 @@ namespace ISD {
bool isByVal() const { return Flags & ByVal; }
void setByVal() { Flags |= One << ByValOffs; }
bool isInAlloca() const { return Flags & InAlloca; }
void setInAlloca() { Flags |= One << InAllocaOffs; }
bool isNest() const { return Flags & Nest; }
void setNest() { Flags |= One << NestOffs; }

View File

@ -609,8 +609,9 @@ public:
return getValueType(Ty, AllowUnknown).getSimpleVT();
}
/// Return the desired alignment for ByVal aggregate function arguments in the
/// caller parameter area. This is the actual alignment, not its logarithm.
/// Return the desired alignment for ByVal or InAlloca aggregate function
/// arguments in the caller parameter area. This is the actual alignment, not
/// its logarithm.
virtual unsigned getByValTypeAlignment(Type *Ty) const;
/// Return the type of registers that this ValueType will eventually require.
@ -1965,12 +1966,13 @@ public:
bool isSRet : 1;
bool isNest : 1;
bool isByVal : 1;
bool isInAlloca : 1;
bool isReturned : 1;
uint16_t Alignment;
ArgListEntry() : isSExt(false), isZExt(false), isInReg(false),
isSRet(false), isNest(false), isByVal(false), isReturned(false),
Alignment(0) { }
isSRet(false), isNest(false), isByVal(false), isInAlloca(false),
isReturned(false), Alignment(0) { }
void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx);
};

View File

@ -5434,6 +5434,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
assert(!CS.hasInAllocaArgument() &&
"sret demotion is incompatible with inalloca");
uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
FTy->getReturnType());
unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(
@ -7142,8 +7144,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setInReg();
if (Args[i].isSRet)
Flags.setSRet();
if (Args[i].isByVal) {
if (Args[i].isByVal)
Flags.setByVal();
if (Args[i].isInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
// and how many bytes a callee cleanup function will pop. If we port
// inalloca to more targets, we'll have to add custom inalloca handling
// in the various CC lowering callbacks.
Flags.setByVal();
}
if (Args[i].isByVal || Args[i].isInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
@ -7362,8 +7374,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setInReg();
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
Flags.setSRet();
if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
Flags.setByVal();
if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
// and how many bytes a callee cleanup function will pop. If we port
// inalloca to more targets, we'll have to add custom inalloca handling
// in the various CC lowering callbacks.
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(TD->getTypeAllocSize(ElementTy));

View File

@ -75,6 +75,7 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
Alignment = CS->getParamAlignment(AttrIdx);
}

View File

@ -65,8 +65,8 @@ static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI) {
Type *Ty = AI->getType();
// 'Dereference' type in case of byval parameter attribute
if (AI->hasByValAttr())
// 'Dereference' type in case of byval or inalloca parameter attribute.
if (AI->hasByValOrInAllocaAttr())
Ty = cast<PointerType>(Ty)->getElementType();
// Size should be aligned to DWORD boundary
ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;

View File

@ -1910,6 +1910,10 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (isVarArg && isWin64)
return false;
// Don't know about inalloca yet.
if (CS.hasInAllocaArgument())
return false;
// Fast-isel doesn't know about callee-pop yet.
if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
TM.Options.GuaranteedTailCallOpt))

View File

@ -2584,9 +2584,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
X86Info->setTCReturnAddrDelta(FPDiff);
}
unsigned NumBytesToPush = NumBytes;
unsigned NumBytesToPop = NumBytes;
// If we have an inalloca argument, all stack space has already been allocated
// for us and be right at the top of the stack. We don't support multiple
// arguments passed in memory when using inalloca.
if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
NumBytesToPush = 0;
assert(ArgLocs.back().getLocMemOffset() == 0 &&
"an inalloca argument must be the only memory argument");
}
if (!IsSibcall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
dl);
Chain = DAG.getCALLSEQ_START(
Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
SDValue RetAddrFrIdx;
// Load return address for tail calls.
@ -2603,10 +2615,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
// Skip inalloca arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (Flags.isInAlloca())
continue;
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
@ -2873,8 +2889,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag, dl);
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytesToPop, true),
DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
@ -2931,7 +2948,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(NumBytesToPop, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPop,
true),
InFlag, dl);

View File

@ -0,0 +1,34 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
%Foo = type { i32, i32 }
%frame = type { %Foo, i32, %Foo }
declare void @f(%frame* inalloca %a)
declare void @Foo_ctor(%Foo* %this)
define void @g() {
entry:
%args = alloca %frame, inalloca
%c = getelementptr %frame* %args, i32 0, i32 2
; CHECK: movl $20, %eax
; CHECK: calll __chkstk
; CHECK: movl %esp,
call void @Foo_ctor(%Foo* %c)
; CHECK: leal 12(%{{.*}}),
; CHECK: subl $4, %esp
; CHECK: calll _Foo_ctor
; CHECK: addl $4, %esp
%b = getelementptr %frame* %args, i32 0, i32 1
store i32 42, i32* %b
; CHECK: movl $42,
%a = getelementptr %frame* %args, i32 0, i32 0
call void @Foo_ctor(%Foo* %a)
; CHECK: subl $4, %esp
; CHECK: calll _Foo_ctor
; CHECK: addl $4, %esp
call void @f(%frame* inalloca %args)
; CHECK: calll _f
ret void
}

View File

@ -0,0 +1,54 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
%Iter = type { i32, i32, i32 }
%frame.reverse = type { %Iter, %Iter }
declare void @llvm.stackrestore(i8*)
declare i8* @llvm.stacksave()
declare void @begin(%Iter* sret)
declare void @plus(%Iter* sret, %Iter*, i32)
declare void @reverse(%frame.reverse* inalloca align 4)
define i32 @main() {
%temp.lvalue = alloca %Iter
br label %blah
blah:
%inalloca.save = call i8* @llvm.stacksave()
%rev_args = alloca %frame.reverse, inalloca, align 4
%beg = getelementptr %frame.reverse* %rev_args, i32 0, i32 0
%end = getelementptr %frame.reverse* %rev_args, i32 0, i32 1
; CHECK: calll __chkstk
; CHECK: movl %[[beg:[^,]*]], %esp
; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]
call void @begin(%Iter* sret %temp.lvalue)
; CHECK: calll _begin
invoke void @plus(%Iter* sret %end, %Iter* %temp.lvalue, i32 4)
to label %invoke.cont unwind label %lpad
; Uses end as sret param.
; CHECK: movl %[[end]], (%esp)
; CHECK: calll _plus
invoke.cont:
call void @begin(%Iter* sret %beg)
; CHECK: movl %[[beg]],
; CHECK: calll _begin
invoke void @reverse(%frame.reverse* inalloca align 4 %rev_args)
to label %invoke.cont5 unwind label %lpad
invoke.cont5: ; preds = %invoke.cont
call void @llvm.stackrestore(i8* %inalloca.save)
ret i32 0
lpad: ; preds = %invoke.cont, %entry
%lp = landingpad { i8*, i32 } personality i8* null
cleanup
unreachable
}

View File

@ -0,0 +1,26 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
%Foo = type { i32, i32 }
declare x86_stdcallcc void @f(%Foo* inalloca %a)
declare x86_stdcallcc void @i(i32 %a)
define void @g() {
%b = alloca %Foo, inalloca
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
; CHECK: movl %[[REG:[^,]*]], %esp
%f1 = getelementptr %Foo* %b, i32 0, i32 0
%f2 = getelementptr %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK: movl $13, (%[[REG]])
; CHECK: movl $42, 4(%[[REG]])
call x86_stdcallcc void @f(%Foo* inalloca %b)
; CHECK: calll _f@8
; CHECK-NOT: %esp
; CHECK: subl $4, %esp
; CHECK: calll _i@4
call x86_stdcallcc void @i(i32 0)
ret void
}

View File

@ -0,0 +1,65 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
%Foo = type { i32, i32 }
declare void @f(%Foo* inalloca %b)
define void @a() {
; CHECK-LABEL: _a:
entry:
%b = alloca %Foo, inalloca
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
; CHECK: movl %[[REG:[^,]*]], %esp
%f1 = getelementptr %Foo* %b, i32 0, i32 0
%f2 = getelementptr %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK: movl $13, (%[[REG]])
; CHECK: movl $42, 4(%[[REG]])
call void @f(%Foo* inalloca %b)
; CHECK: calll _f
ret void
}
declare void @inreg_with_inalloca(i32 inreg %a, %Foo* inalloca %b)
define void @b() {
; CHECK-LABEL: _b:
entry:
%b = alloca %Foo, inalloca
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
; CHECK: movl %[[REG:[^,]*]], %esp
%f1 = getelementptr %Foo* %b, i32 0, i32 0
%f2 = getelementptr %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK: movl $13, (%[[REG]])
; CHECK: movl $42, 4(%[[REG]])
call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
; CHECK: movl $1, %eax
; CHECK: calll _inreg_with_inalloca
ret void
}
declare x86_thiscallcc void @thiscall_with_inalloca(i8* %a, %Foo* inalloca %b)
define void @c() {
; CHECK-LABEL: _c:
entry:
%b = alloca %Foo, inalloca
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
; CHECK: movl %[[REG:[^,]*]], %esp
%f1 = getelementptr %Foo* %b, i32 0, i32 0
%f2 = getelementptr %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
; CHECK: movl $13, (%[[REG]])
; CHECK: movl $42, 4(%[[REG]])
call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
; CHECK: xorl %ecx, %ecx
; CHECK: calll _thiscall_with_inalloca
ret void
}