ARM: simplify and extend byval handling

The main issue being fixed here is that APCS targets handling a "byval align N"
parameter with N > 4 were miscounting what objects were where on the stack,
leading to FrameLowering setting the frame pointer incorrectly and clobbering
the stack.

But byval handling had grown over many years, and had multiple layers of cruft
trying to compensate for each other and calculate padding correctly. This only
really needs to be done once, in the HandleByVal function. Elsewhere should
just do what it's told by that call.

I also stripped out unnecessary APCS/AAPCS distinctions (now that Clang emits
byvals with the correct C ABI alignment), which simplified HandleByVal.

rdar://20095672

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231959 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2015-03-11 18:54:22 +00:00
parent fb14969197
commit 52f83a9ab3
16 changed files with 255 additions and 298 deletions

View File

@ -293,7 +293,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@ -742,8 +742,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);

View File

@ -1856,60 +1856,61 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// on the stack. Remember the next parameter register to allocate,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
ARMTargetLowering::HandleByVal(
CCState *State, unsigned &size, unsigned Align) const {
unsigned reg = State->AllocateReg(GPRArgRegs);
void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
unsigned Align) const {
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
for (unsigned i = 0; i < Waste; ++i)
reg = State->AllocateReg(GPRArgRegs);
}
if (reg != 0) {
unsigned excess = 4 * (ARM::R4 - reg);
// Byval (as with any stack) slots are always at least 4 byte aligned.
Align = std::max(Align, 4U);
// Special case when NSAA != SP and parameter size greater than size of
// all remained GPR regs. In that case we can't split parameter, we must
// send it to stack. We also must set NCRN to R4, so waste all
// remained registers.
const unsigned NSAAOffset = State->getNextStackOffset();
if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
while (State->AllocateReg(GPRArgRegs))
;
return;
}
unsigned Reg = State->AllocateReg(GPRArgRegs);
if (!Reg)
return;
// First register for byval parameter is the first register that wasn't
// allocated before this method call, so it would be "reg".
// If parameter is small enough to be saved in range [reg, r4), then
// the end (first after last) register would be reg + param-size-in-regs,
// else parameter would be splitted between registers and stack,
// end register would be r4 in this case.
unsigned ByValRegBegin = reg;
unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
// Note, first register is allocated in the beginning of function already,
// allocate remained amount of registers we need.
for (unsigned i = reg+1; i != ByValRegEnd; ++i)
State->AllocateReg(GPRArgRegs);
// A byval parameter that is split between registers and memory needs its
// size truncated here.
// In the case where the entire structure fits in registers, we set the
// size in memory to zero.
if (size < excess)
size = 0;
else
size -= excess;
}
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
for (unsigned i = 0; i < Waste; ++i)
Reg = State->AllocateReg(GPRArgRegs);
if (!Reg)
return;
unsigned Excess = 4 * (ARM::R4 - Reg);
// Special case when NSAA != SP and parameter size greater than size of
// all remained GPR regs. In that case we can't split parameter, we must
// send it to stack. We also must set NCRN to R4, so waste all
// remained registers.
const unsigned NSAAOffset = State->getNextStackOffset();
if (NSAAOffset != 0 && Size > Excess) {
while (State->AllocateReg(GPRArgRegs))
;
return;
}
// First register for byval parameter is the first register that wasn't
// allocated before this method call, so it would be "reg".
// If parameter is small enough to be saved in range [reg, r4), then
// the end (first after last) register would be reg + param-size-in-regs,
// else parameter would be splitted between registers and stack,
// end register would be r4 in this case.
unsigned ByValRegBegin = Reg;
unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
// Note, first register is allocated in the beginning of function already,
// allocate remained amount of registers we need.
for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
State->AllocateReg(GPRArgRegs);
// A byval parameter that is split between registers and memory needs its
// size truncated here.
// In the case where the entire structure fits in registers, we set the
// size in memory to zero.
Size = std::max<int>(Size - Excess, 0);
}
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
@ -2818,50 +2819,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
unsigned InRegsParamRecordIdx,
unsigned ArgSize,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
unsigned RBegin, REnd;
CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
NumGPRs = REnd - RBegin;
} else {
unsigned int firstUnalloced;
firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs);
NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
}
unsigned Align = Subtarget->getFrameLowering()->getStackAlignment();
ArgRegsSize = NumGPRs * 4;
// If parameter is split between stack and GPRs...
if (NumGPRs && Align > 4 &&
(ArgRegsSize < ArgSize ||
InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
// Add padding for part of param recovered from GPRs. For example,
// if Align == 8, its last byte must be at address K*8 - 1.
// We need to do it, since remained (stack) part of parameter has
// stack alignment, and we need to "attach" "GPRs head" without gaps
// to it:
// Stack:
// |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
// [ [padding] [GPRs head] ] [ Tail passed via stack ....
//
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Padding =
OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
ArgRegsSaveSize = ArgRegsSize + Padding;
} else
// We don't need to extend regs save size for byval parameters if they
// are passed via GPRs only.
ArgRegsSaveSize = ArgRegsSize;
}
// The remaining GPRs hold either the beginning of variable-argument
// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
@ -2875,13 +2832,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
unsigned ArgSize,
bool ForceMutable,
unsigned ByValStoreOffset,
unsigned TotalArgRegsSaveSize) const {
int ArgOffset,
unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
@ -2896,82 +2848,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned firstRegToSaveIndex, lastRegToSaveIndex;
unsigned RBegin, REnd;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
firstRegToSaveIndex = RBegin - ARM::R0;
lastRegToSaveIndex = REnd - ARM::R0;
} else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs);
lastRegToSaveIndex = 4;
unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
RBegin = RBeginIdx == 4 ? ARM::R4 : GPRArgRegs[RBeginIdx];
REnd = ARM::R4;
}
unsigned ArgRegsSize, ArgRegsSaveSize;
computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
ArgRegsSize, ArgRegsSaveSize);
if (REnd != RBegin)
ArgOffset = -4 * (ARM::R4 - RBegin);
// Store any by-val regs to their spots on the stack so that they may be
// loaded by deferencing the result of formal parameter pointer or va_next.
// Note: once stack area for byval/varargs registers
// was initialized, it can't be initialized again.
if (ArgRegsSaveSize) {
unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
if (Padding) {
assert(AFI->getStoredByValParamsPadding() == 0 &&
"The only parameter may be padded.");
AFI->setStoredByValParamsPadding(Padding);
}
SmallVector<SDValue, 4> MemOps;
const TargetRegisterClass *RC =
AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
Padding +
ByValStoreOffset -
(int64_t)TotalArgRegsSaveSize,
false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
if (Padding) {
MFI->CreateFixedObject(Padding,
ArgOffset + ByValStoreOffset -
(int64_t)ArgRegsSaveSize,
false);
}
SmallVector<SDValue, 4> MemOps;
for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
else
RC = &ARM::GPRRegClass;
unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
unsigned VReg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return FrameIndex;
} else {
if (ArgSize == 0) {
// We cannot allocate a zero-byte object for the first variadic argument,
// so just make up a size.
ArgSize = 4;
}
// This will point to the next argument passed via stack.
return MFI->CreateFixedObject(
ArgSize, ArgOffset, !ForceMutable);
MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return FrameIndex;
}
// Setup stack frame, the va_list pointer will start from.
@ -2989,11 +2898,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex =
StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
0, TotalArgRegsSaveSize);
int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
CCInfo.getInRegsParamsCount(),
CCInfo.getNextStackOffset(), 4);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@ -3019,7 +2926,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
isVarArg));
SmallVector<SDValue, 16> ArgValues;
int lastInsIndex = -1;
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@ -3029,50 +2935,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
unsigned ByValStoreOffset = 0;
unsigned TotalArgRegsSaveSize = 0;
unsigned ArgRegsSaveSizeMaxAlign = 4;
// Calculate the amount of stack space that we need to allocate to store
// byval and variadic arguments that are passed in registers.
// We need to know this before we allocate the first byval or variadic
// argument, as they will be allocated a stack slot below the CFA (Canonical
// Frame Address, the stack pointer at entry to the function).
unsigned ArgRegBegin = ARM::R4;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isMemLoc()) {
int index = VA.getValNo();
if (index != lastInsIndex) {
ISD::ArgFlagsTy Flags = Ins[index].Flags;
if (Flags.isByVal()) {
unsigned ExtraArgRegsSize;
unsigned ExtraArgRegsSaveSize;
computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
Flags.getByValSize(),
ExtraArgRegsSize, ExtraArgRegsSaveSize);
if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
break;
TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
CCInfo.nextInRegsParam();
}
lastInsIndex = index;
}
}
CCValAssign &VA = ArgLocs[i];
unsigned Index = VA.getValNo();
ISD::ArgFlagsTy Flags = Ins[Index].Flags;
if (!Flags.isByVal())
continue;
assert(VA.isMemLoc() && "unexpected byval pointer in reg");
unsigned RBegin, REnd;
CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
ArgRegBegin = std::min(ArgRegBegin, RBegin);
CCInfo.nextInRegsParam();
}
CCInfo.rewindByValRegsInfo();
lastInsIndex = -1;
int lastInsIndex = -1;
if (isVarArg && MFI->hasVAStart()) {
unsigned ExtraArgRegsSize;
unsigned ExtraArgRegsSaveSize;
computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
ExtraArgRegsSize, ExtraArgRegsSaveSize);
TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
if (RegIdx != array_lengthof(GPRArgRegs))
ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
}
// If the arg regs save area contains N-byte aligned values, the
// bottom of it must be at least N-byte aligned.
TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@ -3177,18 +3073,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
int FrameIndex = StoreByValRegs(
CCInfo, DAG, dl, Chain, CurOrigArg,
CurByValIndex,
Ins[VA.getValNo()].PartOffset,
VA.getLocMemOffset(),
Flags.getByValSize(),
true /*force mutable frames*/,
ByValStoreOffset,
TotalArgRegsSaveSize);
ByValStoreOffset += Flags.getByValSize();
ByValStoreOffset = std::min(ByValStoreOffset, 16U);
int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
CurByValIndex, VA.getLocMemOffset(),
Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {

View File

@ -528,12 +528,8 @@ namespace llvm {
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
unsigned ArgSize,
bool ForceMutable,
unsigned ByValStoreOffset,
unsigned TotalArgRegsSaveSize) const;
int ArgOffset,
unsigned ArgSize) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
@ -541,12 +537,6 @@ namespace llvm {
unsigned TotalArgRegsSaveSize,
bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
unsigned InRegsParamRecordIdx,
unsigned ArgSize,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize) const;
SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;

View File

@ -149,11 +149,7 @@ public:
unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
unsigned getArgRegsSaveSize(unsigned Align = 0) const {
if (!Align)
return ArgRegsSaveSize;
return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
}
unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
unsigned getReturnRegsCount() const { return ReturnRegsCount; }

View File

@ -94,8 +94,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
@ -333,8 +332,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");

View File

@ -10,7 +10,9 @@ declare void @llvm.va_end(i8*) nounwind
; CHECK-LABEL: test_byval_8_bytes_alignment:
define void @test_byval_8_bytes_alignment(i32 %i, ...) {
entry:
; CHECK: stm r0, {r1, r2, r3}
; CHECK: sub sp, sp, #12
; CHECK: sub sp, sp, #4
; CHECK: stmib sp, {r1, r2, r3}
%g = alloca i8*
%g1 = bitcast i8** %g to i8*
call void @llvm.va_start(i8* %g1)

View File

@ -10,8 +10,9 @@ define void @t(i32 %a, %struct.s* byval %s) nounwind {
entry:
; Here we need to only check proper start address of restored %s argument.
; CHECK: sub sp, sp, #16
; CHECK: sub sp, sp, #12
; CHECK: push {r11, lr}
; CHECK: sub sp, sp, #4
; CHECK: add r0, sp, #12
; CHECK: stm r0, {r1, r2, r3}
; CHECK: add r0, sp, #12

View File

@ -2,26 +2,26 @@
;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
;CHECK-LABEL: foo:
;CHECK: sub sp, sp, #8
;CHECK: push {r11, lr}
;CHECK: str r0, [sp, #12]
;CHECK: add r0, sp, #12
;CHECK: bl fooUseParam
;CHECK: pop {r11, lr}
;CHECK: add sp, sp, #8
;CHECK: mov pc, lr
;CHECK-LABEL: foo2:
;CHECK: sub sp, sp, #8
;CHECK: sub sp, sp, #16
;CHECK: push {r11, lr}
;CHECK: str r0, [sp, #8]
;CHECK: add r0, sp, #8
;CHECK: str r2, [sp, #12]
;CHECK: bl fooUseParam
;CHECK: add r0, sp, #12
;CHECK: bl fooUseParam
;CHECK: pop {r11, lr}
;CHECK: add sp, sp, #8
;CHECK: add sp, sp, #16
;CHECK: mov pc, lr
;CHECK-LABEL: foo2:
;CHECK: sub sp, sp, #16
;CHECK: push {r11, lr}
;CHECK: str r0, [sp, #8]
;CHECK: add r0, sp, #8
;CHECK: str r2, [sp, #16]
;CHECK: bl fooUseParam
;CHECK: add r0, sp, #16
;CHECK: bl fooUseParam
;CHECK: pop {r11, lr}
;CHECK: add sp, sp, #16
;CHECK: mov pc, lr
;CHECK-LABEL: doFoo:

View File

@ -21,11 +21,12 @@ define void @foo(double %vfp0, ; --> D0, NSAA=SP
i32 %p2, ; --> R3, NSAA=SP+8
i32 %p3) #0 { ; --> SP+4, NSAA=SP+12
entry:
;CHECK: sub sp, #8
;CHECK: sub sp, #12
;CHECK: push.w {r11, lr}
;CHECK: add r0, sp, #8
;CHECK: str r2, [sp, #12]
;CHECK: str r1, [sp, #8]
;CHECK: sub sp, #4
;CHECK: add r0, sp, #12
;CHECK: str r2, [sp, #16]
;CHECK: str r1, [sp, #12]
;CHECK: bl fooUseStruct
call void @fooUseStruct(%st_t* %p1)
ret void

View File

@ -4,7 +4,7 @@
%struct.S227 = type { [49 x i32], i32 }
define void @check227(
i32 %b,
i32 %b,
%struct.S227* byval nocapture %arg0,
%struct.S227* %arg1) {
; b --> R0
@ -13,14 +13,16 @@ define void @check227(
entry:
;CHECK: sub sp, sp, #16
;CHECK: sub sp, sp, #12
;CHECK: push {r11, lr}
;CHECK: sub sp, sp, #4
;CHECK: add r0, sp, #12
;CHECK: stm r0, {r1, r2, r3}
;CHECK: ldr r0, [sp, #212]
;CHECK: bl useInt
;CHECK: add sp, sp, #4
;CHECK: pop {r11, lr}
;CHECK: add sp, sp, #16
;CHECK: add sp, sp, #12
%0 = ptrtoint %struct.S227* %arg1 to i32
tail call void @useInt(i32 %0)

View File

@ -13,15 +13,16 @@ declare void @usePtr(%struct8bytes8align*)
; c -> sp+0..sp+7
define void @foo1(i32 %a, %struct12bytes* byval %b, i64 %c) {
; CHECK-LABEL: foo1
; CHECK: sub sp, sp, #16
; CHECK: sub sp, sp, #12
; CHECK: push {r11, lr}
; CHECK: sub sp, sp, #4
; CHECK: add [[SCRATCH:r[0-9]+]], sp, #12
; CHECK: stm [[SCRATCH]], {r1, r2, r3}
; CHECK: ldr r0, [sp, #24]
; CHECK: ldr r1, [sp, #28]
; CHECK: bl useLong
; CHECK: pop {r11, lr}
; CHECK: add sp, sp, #16
; CHECK: add sp, sp, #12
call void @useLong(i64 %c)
ret void

View File

@ -0,0 +1,76 @@
; RUN: llc -mtriple=thumbv7-apple-ios8.0 %s -o - | FileCheck %s
; This checks that alignments greater than 4 are respected by APCS
; targets. Mostly here to make sure *some* correct code is created after some
; simplifying refactoring; at the time of writing there were no actual APCS
; users of byval alignments > 4, so no real calls for ABI stability.
; "byval align 16" can't fit in any regs with an i8* taking up r0.
define i32 @test_align16(i8*, [4 x i32]* byval align 16 %b) {
; CHECK-LABEL: test_align16:
; CHECK-NOT: sub sp
; CHECK: push {r4, r7, lr}
; CHECK: add r7, sp, #4
; CHECK: ldr r0, [r7, #8]
call void @bar()
%valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
%val = load i32, i32* %valptr
ret i32 %val
}
; byval align 8 can, but we used to incorrectly set r7 here (miscalculating the
; space taken up by arg regs).
define i32 @test_align8(i8*, [4 x i32]* byval align 8 %b) {
; CHECK-LABEL: test_align8:
; CHECK: sub sp, #8
; CHECK: push {r4, r7, lr}
; CHECK: add r7, sp, #4
; CHECK-DAG: str r2, [r7, #8]
; CHECK-DAG: str r3, [r7, #12]
; CHECK: ldr r0, [r7, #8]
call void @bar()
%valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
%val = load i32, i32* %valptr
ret i32 %val
}
; "byval align 32" can't fit in regs no matter what: it would be misaligned
; unless the incoming stack was deliberately misaligned.
define i32 @test_align32(i8*, [4 x i32]* byval align 32 %b) {
; CHECK-LABEL: test_align32:
; CHECK-NOT: sub sp
; CHECK: push {r4, r7, lr}
; CHECK: add r7, sp, #4
; CHECK: ldr r0, [r7, #8]
call void @bar()
%valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
%val = load i32, i32* %valptr
ret i32 %val
}
; When passing an object "byval align N", the stack must be at least N-aligned.
define void @test_call_align16() {
; CHECK-LABEL: test_call_align16:
; CHECK: push {r4, r7, lr}
; CHECK: add r7, sp, #4
; CHECK: mov [[TMP:r[0-9]+]], sp
; CHECK: bfc [[TMP]], #0, #4
; CHECK: mov sp, [[TMP]]
; While we're here, make sure the caller also puts it at sp
; CHECK: mov r[[BASE:[0-9]+]], sp
; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
call i32 @test_align16(i8* null, [4 x i32]* byval align 16 @var)
ret void
}
@var = global [4 x i32] zeroinitializer
declare void @bar()

View File

@ -62,51 +62,51 @@
; CHECK-FP-LABEL: sum
; CHECK-FP: .cfi_startproc
; CHECK-FP: sub sp, sp, #16
; CHECK-FP: .cfi_def_cfa_offset 16
; CHECK-FP: sub sp, sp, #12
; CHECK-FP: .cfi_def_cfa_offset 12
; CHECK-FP: push {r4, lr}
; CHECK-FP: .cfi_def_cfa_offset 20
; CHECK-FP: .cfi_offset lr, -16
; CHECK-FP: .cfi_offset r4, -20
; CHECK-FP: sub sp, sp, #4
; CHECK-FP: .cfi_def_cfa_offset 24
; CHECK-FP: .cfi_offset lr, -20
; CHECK-FP: .cfi_offset r4, -24
; CHECK-FP: sub sp, sp, #8
; CHECK-FP: .cfi_def_cfa_offset 32
; CHECK-FP-ELIM-LABEL: sum
; CHECK-FP-ELIM: .cfi_startproc
; CHECK-FP-ELIM: sub sp, sp, #16
; CHECK-FP-ELIM: .cfi_def_cfa_offset 16
; CHECK-FP-ELIM: sub sp, sp, #12
; CHECK-FP-ELIM: .cfi_def_cfa_offset 12
; CHECK-FP-ELIM: push {r4, r10, r11, lr}
; CHECK-FP-ELIM: .cfi_def_cfa_offset 32
; CHECK-FP-ELIM: .cfi_offset lr, -20
; CHECK-FP-ELIM: .cfi_offset r11, -24
; CHECK-FP-ELIM: .cfi_offset r10, -28
; CHECK-FP-ELIM: .cfi_offset r4, -32
; CHECK-FP-ELIM: .cfi_def_cfa_offset 28
; CHECK-FP-ELIM: .cfi_offset lr, -16
; CHECK-FP-ELIM: .cfi_offset r11, -20
; CHECK-FP-ELIM: .cfi_offset r10, -24
; CHECK-FP-ELIM: .cfi_offset r4, -28
; CHECK-FP-ELIM: add r11, sp, #8
; CHECK-FP-ELIM: .cfi_def_cfa r11, 24
; CHECK-FP-ELIM: .cfi_def_cfa r11, 20
; CHECK-THUMB-FP-LABEL: sum
; CHECK-THUMB-FP: .cfi_startproc
; CHECK-THUMB-FP: sub sp, #16
; CHECK-THUMB-FP: .cfi_def_cfa_offset 16
; CHECK-THUMB-FP: sub sp, #12
; CHECK-THUMB-FP: .cfi_def_cfa_offset 12
; CHECK-THUMB-FP: push {r4, lr}
; CHECK-THUMB-FP: .cfi_def_cfa_offset 20
; CHECK-THUMB-FP: .cfi_offset lr, -16
; CHECK-THUMB-FP: .cfi_offset r4, -20
; CHECK-THUMB-FP: sub sp, #4
; CHECK-THUMB-FP: .cfi_def_cfa_offset 24
; CHECK-THUMB-FP: .cfi_offset lr, -20
; CHECK-THUMB-FP: .cfi_offset r4, -24
; CHECK-THUMB-FP: sub sp, #8
; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
; CHECK-THUMB-FP-ELIM-LABEL: sum
; CHECK-THUMB-FP-ELIM: .cfi_startproc
; CHECK-THUMB-FP-ELIM: sub sp, #16
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16
; CHECK-THUMB-FP-ELIM: sub sp, #12
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 12
; CHECK-THUMB-FP-ELIM: push {r4, r6, r7, lr}
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32
; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20
; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24
; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -28
; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 28
; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -16
; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -20
; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -24
; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -28
; CHECK-THUMB-FP-ELIM: add r7, sp, #8
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 20
define i32 @sum(i32 %count, ...) {
entry:

View File

@ -161,7 +161,7 @@ entry:
%coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
%8 = load i32, i32* %7, align 1
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@ -308,7 +308,7 @@ entry:
%coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
%8 = load i32, i32* %7, align 1
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@ -443,7 +443,7 @@ entry:
%coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
%8 = load i32, i32* %7, align 1
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@ -482,7 +482,7 @@ entry:
%coerce.dive5 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d2, i32 0, i32 0
%5 = bitcast [2 x i16]* %coerce.dive5 to i32*
%6 = load i32, i32* %5, align 1
call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 4 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
ret void
}

View File

@ -21,10 +21,10 @@ define void @varargs_func(i32 %arg1, ...) {
; CHECK-LABEL: varargs_func:
; Reserve space for the varargs save area. This currently reserves
; more than enough (16 bytes rather than the 12 bytes needed).
; CHECK: sub sp, sp, #16
; CHECK: sub sp, sp, #12
; CHECK: push {r11, lr}
; Align the stack pointer to a multiple of 16.
; CHECK: sub sp, sp, #8
; CHECK: sub sp, sp, #12
; Calculate the address of the varargs save area and save varargs
; arguments into it.
; CHECK-NEXT: add r0, sp, #20

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
; RUN: llc < %s -mtriple=armv7-linux-gnueabihf | FileCheck %s -check-prefix=EABI
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
define i32 @f(i32 %a, ...) {
@ -11,13 +11,17 @@ entry:
%tmp1 = load i32, i32* %tmp ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
call void @llvm.va_start(i8* null)
call void asm sideeffect "", "~{d8}"()
br label %return
return: ; preds = %entry
%retval2 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval2
; EABI: add sp, sp, #12
; EABI: add sp, sp, #16
; EABI: vpop {d8}
; EABI: add sp, sp, #4
; EABI: add sp, sp, #12
; OABI: add sp, sp, #12
; OABI: add sp, sp, #12
}