diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 5a5bd574aca..830953b2b0c 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -293,7 +293,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -742,8 +742,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 53765846d6f..06dddb6f859 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1856,60 +1856,61 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// on the stack. Remember the next parameter register to allocate, /// and then confiscate the rest of the parameter registers to insure /// this. -void -ARMTargetLowering::HandleByVal( - CCState *State, unsigned &size, unsigned Align) const { - unsigned reg = State->AllocateReg(GPRArgRegs); +void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, + unsigned Align) const { assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { - if (Subtarget->isAAPCS_ABI() && Align > 4) { - unsigned AlignInRegs = Align / 4; - unsigned Waste = (ARM::R4 - reg) % AlignInRegs; - for (unsigned i = 0; i < Waste; ++i) - reg = State->AllocateReg(GPRArgRegs); - } - if (reg != 0) { - unsigned excess = 4 * (ARM::R4 - reg); + // Byval (as with any stack) slots are always at least 4 byte aligned. + Align = std::max(Align, 4U); - // Special case when NSAA != SP and parameter size greater than size of - // all remained GPR regs. In that case we can't split parameter, we must - // send it to stack. We also must set NCRN to R4, so waste all - // remained registers. - const unsigned NSAAOffset = State->getNextStackOffset(); - if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { - while (State->AllocateReg(GPRArgRegs)) - ; - return; - } + unsigned Reg = State->AllocateReg(GPRArgRegs); + if (!Reg) + return; - // First register for byval parameter is the first register that wasn't - // allocated before this method call, so it would be "reg". - // If parameter is small enough to be saved in range [reg, r4), then - // the end (first after last) register would be reg + param-size-in-regs, - // else parameter would be splitted between registers and stack, - // end register would be r4 in this case. - unsigned ByValRegBegin = reg; - unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; - State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); - // Note, first register is allocated in the beginning of function already, - // allocate remained amount of registers we need. - for (unsigned i = reg+1; i != ByValRegEnd; ++i) - State->AllocateReg(GPRArgRegs); - // A byval parameter that is split between registers and memory needs its - // size truncated here. - // In the case where the entire structure fits in registers, we set the - // size in memory to zero. - if (size < excess) - size = 0; - else - size -= excess; - } + unsigned AlignInRegs = Align / 4; + unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; + for (unsigned i = 0; i < Waste; ++i) + Reg = State->AllocateReg(GPRArgRegs); + + if (!Reg) + return; + + unsigned Excess = 4 * (ARM::R4 - Reg); + + // Special case when NSAA != SP and parameter size greater than size of + // all remained GPR regs. In that case we can't split parameter, we must + // send it to stack. We also must set NCRN to R4, so waste all + // remained registers. + const unsigned NSAAOffset = State->getNextStackOffset(); + if (NSAAOffset != 0 && Size > Excess) { + while (State->AllocateReg(GPRArgRegs)) + ; + return; } + + // First register for byval parameter is the first register that wasn't + // allocated before this method call, so it would be "reg". + // If parameter is small enough to be saved in range [reg, r4), then + // the end (first after last) register would be reg + param-size-in-regs, + // else parameter would be splitted between registers and stack, + // end register would be r4 in this case. + unsigned ByValRegBegin = Reg; + unsigned ByValRegEnd = std::min(Reg + Size / 4, ARM::R4); + State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); + // Note, first register is allocated in the beginning of function already, + // allocate remained amount of registers we need. + for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) + State->AllocateReg(GPRArgRegs); + // A byval parameter that is split between registers and memory needs its + // size truncated here. + // In the case where the entire structure fits in registers, we set the + // size in memory to zero. + Size = std::max(Size - Excess, 0); } + /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. @@ -2818,50 +2819,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } -void -ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned InRegsParamRecordIdx, - unsigned ArgSize, - unsigned &ArgRegsSize, - unsigned &ArgRegsSaveSize) - const { - unsigned NumGPRs; - if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { - unsigned RBegin, REnd; - CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); - NumGPRs = REnd - RBegin; - } else { - unsigned int firstUnalloced; - firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs); - NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; - } - - unsigned Align = Subtarget->getFrameLowering()->getStackAlignment(); - ArgRegsSize = NumGPRs * 4; - - // If parameter is split between stack and GPRs... - if (NumGPRs && Align > 4 && - (ArgRegsSize < ArgSize || - InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { - // Add padding for part of param recovered from GPRs. For example, - // if Align == 8, its last byte must be at address K*8 - 1. - // We need to do it, since remained (stack) part of parameter has - // stack alignment, and we need to "attach" "GPRs head" without gaps - // to it: - // Stack: - // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... - // [ [padding] [GPRs head] ] [ Tail passed via stack .... - // - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned Padding = - OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align); - ArgRegsSaveSize = ArgRegsSize + Padding; - } else - // We don't need to extend regs save size for byval parameters if they - // are passed via GPRs only. - ArgRegsSaveSize = ArgRegsSize; -} - // The remaining GPRs hold either the beginning of variable-argument // data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data @@ -2875,13 +2832,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - unsigned ArgSize, - bool ForceMutable, - unsigned ByValStoreOffset, - unsigned TotalArgRegsSaveSize) const { - + int ArgOffset, + unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; @@ -2896,82 +2848,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); - unsigned firstRegToSaveIndex, lastRegToSaveIndex; unsigned RBegin, REnd; if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); - firstRegToSaveIndex = RBegin - ARM::R0; - lastRegToSaveIndex = REnd - ARM::R0; } else { - firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs); - lastRegToSaveIndex = 4; + unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); + RBegin = RBeginIdx == 4 ? ARM::R4 : GPRArgRegs[RBeginIdx]; + REnd = ARM::R4; } - unsigned ArgRegsSize, ArgRegsSaveSize; - computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, - ArgRegsSize, ArgRegsSaveSize); + if (REnd != RBegin) + ArgOffset = -4 * (ARM::R4 - RBegin); - // Store any by-val regs to their spots on the stack so that they may be - // loaded by deferencing the result of formal parameter pointer or va_next. - // Note: once stack area for byval/varargs registers - // was initialized, it can't be initialized again. - if (ArgRegsSaveSize) { - unsigned Padding = ArgRegsSaveSize - ArgRegsSize; + int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false); + SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); - if (Padding) { - assert(AFI->getStoredByValParamsPadding() == 0 && - "The only parameter may be padded."); - AFI->setStoredByValParamsPadding(Padding); - } + SmallVector MemOps; + const TargetRegisterClass *RC = + AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; - int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, - Padding + - ByValStoreOffset - - (int64_t)TotalArgRegsSaveSize, - false); - SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); - if (Padding) { - MFI->CreateFixedObject(Padding, - ArgOffset + ByValStoreOffset - - (int64_t)ArgRegsSaveSize, - false); - } - - SmallVector MemOps; - for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; - ++firstRegToSaveIndex, ++i) { - const TargetRegisterClass *RC; - if (AFI->isThumb1OnlyFunction()) - RC = &ARM::tGPRRegClass; - else - RC = &ARM::GPRRegClass; - - unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = + for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { + unsigned VReg = MF.addLiveIn(Reg, RC); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, getPointerTy())); - } - - AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); - - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); - return FrameIndex; - } else { - if (ArgSize == 0) { - // We cannot allocate a zero-byte object for the first variadic argument, - // so just make up a size. - ArgSize = 4; - } - // This will point to the next argument passed via stack. - return MFI->CreateFixedObject( - ArgSize, ArgOffset, !ForceMutable); + MachinePointerInfo(OrigArg, 4 * i), false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + return FrameIndex; } // Setup stack frame, the va_list pointer will start from. @@ -2989,11 +2898,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. - int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, - CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, - 0, TotalArgRegsSaveSize); - + int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), + CCInfo.getNextStackOffset(), 4); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -3019,7 +2926,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, isVarArg)); SmallVector ArgValues; - int lastInsIndex = -1; SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -3029,50 +2935,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); - unsigned ByValStoreOffset = 0; - unsigned TotalArgRegsSaveSize = 0; - unsigned ArgRegsSaveSizeMaxAlign = 4; - // Calculate the amount of stack space that we need to allocate to store // byval and variadic arguments that are passed in registers. // We need to know this before we allocate the first byval or variadic // argument, as they will be allocated a stack slot below the CFA (Canonical // Frame Address, the stack pointer at entry to the function). + unsigned ArgRegBegin = ARM::R4; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - if (VA.isMemLoc()) { - int index = VA.getValNo(); - if (index != lastInsIndex) { - ISD::ArgFlagsTy Flags = Ins[index].Flags; - if (Flags.isByVal()) { - unsigned ExtraArgRegsSize; - unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(), - Flags.getByValSize(), - ExtraArgRegsSize, ExtraArgRegsSaveSize); + if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) + break; - TotalArgRegsSaveSize += ExtraArgRegsSaveSize; - if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) - ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); - CCInfo.nextInRegsParam(); - } - lastInsIndex = index; - } - } + CCValAssign &VA = ArgLocs[i]; + unsigned Index = VA.getValNo(); + ISD::ArgFlagsTy Flags = Ins[Index].Flags; + if (!Flags.isByVal()) + continue; + + assert(VA.isMemLoc() && "unexpected byval pointer in reg"); + unsigned RBegin, REnd; + CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); + ArgRegBegin = std::min(ArgRegBegin, RBegin); + + CCInfo.nextInRegsParam(); } CCInfo.rewindByValRegsInfo(); - lastInsIndex = -1; + + int lastInsIndex = -1; if (isVarArg && MFI->hasVAStart()) { - unsigned ExtraArgRegsSize; - unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, - ExtraArgRegsSize, ExtraArgRegsSaveSize); - TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); + if (RegIdx != array_lengthof(GPRArgRegs)) + ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); } - // If the arg regs save area contains N-byte aligned values, the - // bottom of it must be at least N-byte aligned. - TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); - TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); + + unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); + AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -3177,18 +3073,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); - ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); - int FrameIndex = StoreByValRegs( - CCInfo, DAG, dl, Chain, CurOrigArg, - CurByValIndex, - Ins[VA.getValNo()].PartOffset, - VA.getLocMemOffset(), - Flags.getByValSize(), - true /*force mutable frames*/, - ByValStoreOffset, - TotalArgRegsSaveSize); - ByValStoreOffset += Flags.getByValSize(); - ByValStoreOffset = std::min(ByValStoreOffset, 16U); + int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, + CurByValIndex, VA.getLocMemOffset(), + Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 6977862fe35..f769b3d6219 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -528,12 +528,8 @@ namespace llvm { SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - unsigned ArgSize, - bool ForceMutable, - unsigned ByValStoreOffset, - unsigned TotalArgRegsSaveSize) const; + int ArgOffset, + unsigned ArgSize) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, @@ -541,12 +537,6 @@ namespace llvm { unsigned TotalArgRegsSaveSize, bool ForceMutable = false) const; - void computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned InRegsParamRecordIdx, - unsigned ArgSize, - unsigned &ArgRegsSize, - unsigned &ArgRegsSaveSize) const; - SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index ddfdb5240c2..a68ab1ba912 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -149,11 +149,7 @@ public: unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; } void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; } - unsigned getArgRegsSaveSize(unsigned Align = 0) const { - if (!Align) - return ArgRegsSaveSize; - return (ArgRegsSaveSize + Align - 1) & ~(Align - 1); - } + unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } unsigned getReturnRegsCount() const { return ReturnRegsCount; } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 7dcc64e15c0..e6f69dc60b2 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -94,8 +94,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { const Thumb1InstrInfo &TII = *static_cast(STI.getInstrInfo()); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); @@ -333,8 +332,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, const Thumb1InstrInfo &TII = *static_cast(STI.getInstrInfo()); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll index b5bdc1b9dfa..b64b1bf4ccc 100644 --- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -10,7 +10,9 @@ declare void @llvm.va_end(i8*) nounwind ; CHECK-LABEL: test_byval_8_bytes_alignment: define void @test_byval_8_bytes_alignment(i32 %i, ...) { entry: -; CHECK: stm r0, {r1, r2, r3} +; CHECK: sub sp, sp, #12 +; CHECK: sub sp, sp, #4 +; CHECK: stmib sp, {r1, r2, r3} %g = alloca i8* %g1 = bitcast i8** %g to i8* call void @llvm.va_start(i8* %g1) diff --git a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll index 0028eec80f4..ff3b7e16188 100644 --- a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll +++ b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll @@ -10,8 +10,9 @@ define void @t(i32 %a, %struct.s* byval %s) nounwind { entry: ; Here we need to only check proper start address of restored %s argument. -; CHECK: sub sp, sp, #16 +; CHECK: sub sp, sp, #12 ; CHECK: push {r11, lr} +; CHECK: sub sp, sp, #4 ; CHECK: add r0, sp, #12 ; CHECK: stm r0, {r1, r2, r3} ; CHECK: add r0, sp, #12 diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll index c5eba7d4773..c38dd16f0d2 100644 --- a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll +++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll @@ -2,26 +2,26 @@ ;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s ;CHECK-LABEL: foo: -;CHECK: sub sp, sp, #8 -;CHECK: push {r11, lr} -;CHECK: str r0, [sp, #12] -;CHECK: add r0, sp, #12 -;CHECK: bl fooUseParam -;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #8 -;CHECK: mov pc, lr - -;CHECK-LABEL: foo2: -;CHECK: sub sp, sp, #8 +;CHECK: sub sp, sp, #16 ;CHECK: push {r11, lr} ;CHECK: str r0, [sp, #8] ;CHECK: add r0, sp, #8 -;CHECK: str r2, [sp, #12] -;CHECK: bl fooUseParam -;CHECK: add r0, sp, #12 ;CHECK: bl fooUseParam ;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #8 +;CHECK: add sp, sp, #16 +;CHECK: mov pc, lr + +;CHECK-LABEL: foo2: +;CHECK: sub sp, sp, #16 +;CHECK: push {r11, lr} +;CHECK: str r0, [sp, #8] +;CHECK: add r0, sp, #8 +;CHECK: str r2, [sp, #16] +;CHECK: bl fooUseParam +;CHECK: add r0, sp, #16 +;CHECK: bl fooUseParam +;CHECK: pop {r11, lr} +;CHECK: add sp, sp, #16 ;CHECK: mov pc, lr ;CHECK-LABEL: doFoo: diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll index e79a3ba741e..68b918d2d7d 100644 --- a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll +++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll @@ -21,11 +21,12 @@ define void @foo(double %vfp0, ; --> D0, NSAA=SP i32 %p2, ; --> R3, NSAA=SP+8 i32 %p3) #0 { ; --> SP+4, NSAA=SP+12 entry: - ;CHECK: sub sp, #8 + ;CHECK: sub sp, #12 ;CHECK: push.w {r11, lr} - ;CHECK: add r0, sp, #8 - ;CHECK: str r2, [sp, #12] - ;CHECK: str r1, [sp, #8] + ;CHECK: sub sp, #4 + ;CHECK: add r0, sp, #12 + ;CHECK: str r2, [sp, #16] + ;CHECK: str r1, [sp, #12] ;CHECK: bl fooUseStruct call void @fooUseStruct(%st_t* %p1) ret void diff --git a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll index 7bf03a16c6f..3c20c6b5363 100644 --- a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll +++ b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll @@ -4,7 +4,7 @@ %struct.S227 = type { [49 x i32], i32 } define void @check227( - i32 %b, + i32 %b, %struct.S227* byval nocapture %arg0, %struct.S227* %arg1) { ; b --> R0 @@ -13,14 +13,16 @@ define void @check227( entry: -;CHECK: sub sp, sp, #16 +;CHECK: sub sp, sp, #12 ;CHECK: push {r11, lr} +;CHECK: sub sp, sp, #4 ;CHECK: add r0, sp, #12 ;CHECK: stm r0, {r1, r2, r3} ;CHECK: ldr r0, [sp, #212] ;CHECK: bl useInt +;CHECK: add sp, sp, #4 ;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #16 +;CHECK: add sp, sp, #12 %0 = ptrtoint %struct.S227* %arg1 to i32 tail call void @useInt(i32 %0) diff --git a/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll index 33bfa2fa61c..5b2fc57359a 100644 --- a/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll +++ b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll @@ -13,15 +13,16 @@ declare void @usePtr(%struct8bytes8align*) ; c -> sp+0..sp+7 define void @foo1(i32 %a, %struct12bytes* byval %b, i64 %c) { ; CHECK-LABEL: foo1 -; CHECK: sub sp, sp, #16 +; CHECK: sub sp, sp, #12 ; CHECK: push {r11, lr} +; CHECK: sub sp, sp, #4 ; CHECK: add [[SCRATCH:r[0-9]+]], sp, #12 ; CHECK: stm [[SCRATCH]], {r1, r2, r3} ; CHECK: ldr r0, [sp, #24] ; CHECK: ldr r1, [sp, #28] ; CHECK: bl useLong ; CHECK: pop {r11, lr} -; CHECK: add sp, sp, #16 +; CHECK: add sp, sp, #12 call void @useLong(i64 %c) ret void diff --git a/test/CodeGen/ARM/byval-align.ll b/test/CodeGen/ARM/byval-align.ll new file mode 100644 index 00000000000..a26b5a79575 --- /dev/null +++ b/test/CodeGen/ARM/byval-align.ll @@ -0,0 +1,76 @@ +; RUN: llc -mtriple=thumbv7-apple-ios8.0 %s -o - | FileCheck %s + +; This checks that alignments greater than 4 are respected by APCS +; targets. Mostly here to make sure *some* correct code is created after some +; simplifying refactoring; at the time of writing there were no actual APCS +; users of byval alignments > 4, so no real calls for ABI stability. + +; "byval align 16" can't fit in any regs with an i8* taking up r0. +define i32 @test_align16(i8*, [4 x i32]* byval align 16 %b) { +; CHECK-LABEL: test_align16: +; CHECK-NOT: sub sp +; CHECK: push {r4, r7, lr} +; CHECK: add r7, sp, #4 + +; CHECK: ldr r0, [r7, #8] + + call void @bar() + %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0 + %val = load i32, i32* %valptr + ret i32 %val +} + +; byval align 8 can, but we used to incorrectly set r7 here (miscalculating the +; space taken up by arg regs). +define i32 @test_align8(i8*, [4 x i32]* byval align 8 %b) { +; CHECK-LABEL: test_align8: +; CHECK: sub sp, #8 +; CHECK: push {r4, r7, lr} +; CHECK: add r7, sp, #4 + +; CHECK-DAG: str r2, [r7, #8] +; CHECK-DAG: str r3, [r7, #12] + +; CHECK: ldr r0, [r7, #8] + + call void @bar() + %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0 + %val = load i32, i32* %valptr + ret i32 %val +} + +; "byval align 32" can't fit in regs no matter what: it would be misaligned +; unless the incoming stack was deliberately misaligned. +define i32 @test_align32(i8*, [4 x i32]* byval align 32 %b) { +; CHECK-LABEL: test_align32: +; CHECK-NOT: sub sp +; CHECK: push {r4, r7, lr} +; CHECK: add r7, sp, #4 + +; CHECK: ldr r0, [r7, #8] + + call void @bar() + %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0 + %val = load i32, i32* %valptr + ret i32 %val +} + +; When passing an object "byval align N", the stack must be at least N-aligned. +define void @test_call_align16() { +; CHECK-LABEL: test_call_align16: +; CHECK: push {r4, r7, lr} +; CHECK: add r7, sp, #4 + +; CHECK: mov [[TMP:r[0-9]+]], sp +; CHECK: bfc [[TMP]], #0, #4 +; CHECK: mov sp, [[TMP]] + +; While we're here, make sure the caller also puts it at sp + ; CHECK: mov r[[BASE:[0-9]+]], sp + ; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]] + call i32 @test_align16(i8* null, [4 x i32]* byval align 16 @var) + ret void +} + +@var = global [4 x i32] zeroinitializer +declare void @bar() diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll index 2608623b986..4ff3fa03d65 100644 --- a/test/CodeGen/ARM/debug-frame-vararg.ll +++ b/test/CodeGen/ARM/debug-frame-vararg.ll @@ -62,51 +62,51 @@ ; CHECK-FP-LABEL: sum ; CHECK-FP: .cfi_startproc -; CHECK-FP: sub sp, sp, #16 -; CHECK-FP: .cfi_def_cfa_offset 16 +; CHECK-FP: sub sp, sp, #12 +; CHECK-FP: .cfi_def_cfa_offset 12 ; CHECK-FP: push {r4, lr} +; CHECK-FP: .cfi_def_cfa_offset 20 +; CHECK-FP: .cfi_offset lr, -16 +; CHECK-FP: .cfi_offset r4, -20 +; CHECK-FP: sub sp, sp, #4 ; CHECK-FP: .cfi_def_cfa_offset 24 -; CHECK-FP: .cfi_offset lr, -20 -; CHECK-FP: .cfi_offset r4, -24 -; CHECK-FP: sub sp, sp, #8 -; CHECK-FP: .cfi_def_cfa_offset 32 ; CHECK-FP-ELIM-LABEL: sum ; CHECK-FP-ELIM: .cfi_startproc -; CHECK-FP-ELIM: sub sp, sp, #16 -; CHECK-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-FP-ELIM: sub sp, sp, #12 +; CHECK-FP-ELIM: .cfi_def_cfa_offset 12 ; CHECK-FP-ELIM: push {r4, r10, r11, lr} -; CHECK-FP-ELIM: .cfi_def_cfa_offset 32 -; CHECK-FP-ELIM: .cfi_offset lr, -20 -; CHECK-FP-ELIM: .cfi_offset r11, -24 -; CHECK-FP-ELIM: .cfi_offset r10, -28 -; CHECK-FP-ELIM: .cfi_offset r4, -32 +; CHECK-FP-ELIM: .cfi_def_cfa_offset 28 +; CHECK-FP-ELIM: .cfi_offset lr, -16 +; CHECK-FP-ELIM: .cfi_offset r11, -20 +; CHECK-FP-ELIM: .cfi_offset r10, -24 +; CHECK-FP-ELIM: .cfi_offset r4, -28 ; CHECK-FP-ELIM: add r11, sp, #8 -; CHECK-FP-ELIM: .cfi_def_cfa r11, 24 +; CHECK-FP-ELIM: .cfi_def_cfa r11, 20 ; CHECK-THUMB-FP-LABEL: sum ; CHECK-THUMB-FP: .cfi_startproc -; CHECK-THUMB-FP: sub sp, #16 -; CHECK-THUMB-FP: .cfi_def_cfa_offset 16 +; CHECK-THUMB-FP: sub sp, #12 +; CHECK-THUMB-FP: .cfi_def_cfa_offset 12 ; CHECK-THUMB-FP: push {r4, lr} +; CHECK-THUMB-FP: .cfi_def_cfa_offset 20 +; CHECK-THUMB-FP: .cfi_offset lr, -16 +; CHECK-THUMB-FP: .cfi_offset r4, -20 +; CHECK-THUMB-FP: sub sp, #4 ; CHECK-THUMB-FP: .cfi_def_cfa_offset 24 -; CHECK-THUMB-FP: .cfi_offset lr, -20 -; CHECK-THUMB-FP: .cfi_offset r4, -24 -; CHECK-THUMB-FP: sub sp, #8 -; CHECK-THUMB-FP: .cfi_def_cfa_offset 32 ; CHECK-THUMB-FP-ELIM-LABEL: sum ; CHECK-THUMB-FP-ELIM: .cfi_startproc -; CHECK-THUMB-FP-ELIM: sub sp, #16 -; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16 +; CHECK-THUMB-FP-ELIM: sub sp, #12 +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 12 ; CHECK-THUMB-FP-ELIM: push {r4, r6, r7, lr} -; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32 -; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20 -; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24 -; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -28 -; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32 +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 28 +; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -16 +; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -20 +; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -24 +; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -28 ; CHECK-THUMB-FP-ELIM: add r7, sp, #8 -; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24 +; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 20 define i32 @sum(i32 %count, ...) { entry: diff --git a/test/CodeGen/ARM/ssp-data-layout.ll b/test/CodeGen/ARM/ssp-data-layout.ll index 516cc2bdc27..d08e7de57d9 100644 --- a/test/CodeGen/ARM/ssp-data-layout.ll +++ b/test/CodeGen/ARM/ssp-data-layout.ll @@ -161,7 +161,7 @@ entry: %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0 %7 = bitcast [2 x i16]* %coerce.dive26 to i32* %8 = load i32, i32* %7, align 1 - call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) ret void } @@ -308,7 +308,7 @@ entry: %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0 %7 = bitcast [2 x i16]* %coerce.dive26 to i32* %8 = load i32, i32* %7, align 1 - call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) ret void } @@ -443,7 +443,7 @@ entry: %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0 %7 = bitcast [2 x i16]* %coerce.dive26 to i32* %8 = load i32, i32* %7, align 1 - call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) ret void } @@ -482,7 +482,7 @@ entry: %coerce.dive5 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d2, i32 0, i32 0 %5 = bitcast [2 x i16]* %coerce.dive5 to i32* %6 = load i32, i32* %5, align 1 - call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0) + call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 4 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0) ret void } diff --git a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll index 148a79df0cb..4879d73894d 100644 --- a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll +++ b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll @@ -21,10 +21,10 @@ define void @varargs_func(i32 %arg1, ...) { ; CHECK-LABEL: varargs_func: ; Reserve space for the varargs save area. This currently reserves ; more than enough (16 bytes rather than the 12 bytes needed). -; CHECK: sub sp, sp, #16 +; CHECK: sub sp, sp, #12 ; CHECK: push {r11, lr} ; Align the stack pointer to a multiple of 16. -; CHECK: sub sp, sp, #8 +; CHECK: sub sp, sp, #12 ; Calculate the address of the varargs save area and save varargs ; arguments into it. ; CHECK-NEXT: add r0, sp, #20 diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll index d19abd5ef80..6dc71352214 100644 --- a/test/CodeGen/ARM/vargs_align.ll +++ b/test/CodeGen/ARM/vargs_align.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI +; RUN: llc < %s -mtriple=armv7-linux-gnueabihf | FileCheck %s -check-prefix=EABI ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI define i32 @f(i32 %a, ...) { @@ -11,13 +11,17 @@ entry: %tmp1 = load i32, i32* %tmp ; [#uses=1] store i32 %tmp1, i32* %retval call void @llvm.va_start(i8* null) + call void asm sideeffect "", "~{d8}"() br label %return return: ; preds = %entry %retval2 = load i32, i32* %retval ; [#uses=1] ret i32 %retval2 -; EABI: add sp, sp, #12 ; EABI: add sp, sp, #16 +; EABI: vpop {d8} +; EABI: add sp, sp, #4 +; EABI: add sp, sp, #12 + ; OABI: add sp, sp, #12 ; OABI: add sp, sp, #12 }