[ARM] Refactor the prologue/epilogue emission to be more robust.

This is the first step toward supporting shrink-wrapping for this target.

The changes could be summarized by these items:
- Expand the tail-call return as part of the expand pseudo pass.
- Get rid of the assumptions that the epilogue is the exit block:
  * Do not assume which registers are free in the epilogue. (This indirectly
    improve the lowering of the code for the segmented stacks, see the test
    cases.)
  * Take into account that the basic block can be empty.

Related to <rdar://problem/20821730>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242714 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Quentin Colombet 2015-07-20 21:42:14 +00:00
parent c9f86c1260
commit b2dab382ce
8 changed files with 258 additions and 152 deletions

View File

@ -747,6 +747,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
switch (Opcode) {
default:
return false;
case ARM::TCRETURNdi:
case ARM::TCRETURNri: {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->isReturn() &&
"Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
MBB.getParent()->getSubtarget().getInstrInfo());
// Tail call return: adjust the stack pointer and jump to callee.
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
unsigned TCOpcode =
STI->isThumb()
? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
: ARM::TAILJMPd;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
if (JumpTarget.isGlobal())
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
JumpTarget.getTargetFlags());
else {
assert(JumpTarget.isSymbol());
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
// Add the default predicate in Thumb mode.
if (STI->isThumb())
MIB.addImm(ARMCC::AL).addReg(0);
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl,
TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
.addReg(JumpTarget.getReg(), RegState::Kill);
}
MachineInstr *NewMI = std::prev(MBBI);
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
NewMI->addOperand(MBBI->getOperand(i));
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
MBBI = NewMI;
return true;
}
case ARM::VMOVScc:
case ARM::VMOVDcc: {
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;

View File

@ -689,60 +689,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
// Resolve TCReturn pseudo-instruction
void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
const ARMBaseInstrInfo &TII =
*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
return;
// Tail call return: adjust the stack pointer and jump to callee.
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
unsigned TCOpcode = STI.isThumb() ?
(STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
ARM::TAILJMPd;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
if (JumpTarget.isGlobal())
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
JumpTarget.getTargetFlags());
else {
assert(JumpTarget.isSymbol());
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
// Add the default predicate in Thumb mode.
if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
addReg(JumpTarget.getReg(), RegState::Kill);
}
MachineInstr *NewMI = std::prev(MBBI);
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
NewMI->addOperand(MBBI->getOperand(i));
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
MBBI = NewMI;
}
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@ -758,10 +706,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
fixTCReturn(MF, MBB);
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
}
// First put ourselves on the first (from top) terminator instructions.
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
@ -840,8 +790,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
fixTCReturn(MF, MBB);
if (ArgRegsSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
@ -1008,7 +956,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
// Put any subsequent vpush instructions before this one: they will refer to
// higher register numbers so need to be pushed first in order to preserve
// monotonicity.
--MI;
if (MI != MBB.begin())
--MI;
}
}
@ -1022,12 +971,16 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
RetOpcode == ARM::TCRETURNri);
bool isInterrupt =
RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
DebugLoc DL;
bool isTailCall = false;
bool isInterrupt = false;
if (MBB.end() != MI) {
DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
isInterrupt =
RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
}
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@ -1044,10 +997,13 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
STI.hasV5TOps()) {
Reg = ARM::PC;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
if (MBB.succ_empty()) {
Reg = ARM::PC;
DeleteRet = true;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
} else
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
// Fold the return instruction into the LDM.
DeleteRet = true;
}
// If NoGap is true, pop consecutive registers and then leave the rest
@ -1068,7 +1024,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet) {
if (DeleteRet && MI != MBB.end()) {
MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
@ -1095,7 +1051,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
// Put any subsequent vpop instructions after this one: they will refer to
// higher register numbers so need to be popped afterwards.
++MI;
if (MI != MBB.end())
++MI;
}
}
@ -1913,21 +1870,51 @@ void ARMFrameLowering::adjustForSegmentedStacks(
MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
// Grab everything that reaches PrologueMBB to update there liveness as well.
SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
SmallVector<MachineBasicBlock *, 2> WalkList;
WalkList.push_back(&PrologueMBB);
do {
MachineBasicBlock *CurMBB = WalkList.pop_back_val();
for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
if (BeforePrologueRegion.insert(PredBB).second)
WalkList.push_back(PredBB);
}
} while (!WalkList.empty());
// The order in that list is important.
// The blocks will all be inserted before PrologueMBB using that order.
// Therefore the block that should appear first in the CFG should appear
// first in the list.
MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
PostStackMBB};
const int NbAddedBlocks = sizeof(AddedBlocks) / sizeof(AddedBlocks[0]);
for (int Idx = 0; Idx < NbAddedBlocks; ++Idx)
BeforePrologueRegion.insert(AddedBlocks[Idx]);
for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
e = PrologueMBB.livein_end();
i != e; ++i) {
AllocMBB->addLiveIn(*i);
GetMBB->addLiveIn(*i);
McrMBB->addLiveIn(*i);
PrevStackMBB->addLiveIn(*i);
PostStackMBB->addLiveIn(*i);
for (MachineBasicBlock *PredBB : BeforePrologueRegion)
PredBB->addLiveIn(*i);
}
MF.push_front(PostStackMBB);
MF.push_front(AllocMBB);
MF.push_front(GetMBB);
MF.push_front(McrMBB);
MF.push_front(PrevStackMBB);
// Remove the newly added blocks from the list, since we know
// we do not have to do the following updates for them.
for (int Idx = 0; Idx < NbAddedBlocks; ++Idx) {
BeforePrologueRegion.erase(AddedBlocks[Idx]);
MF.insert(&PrologueMBB, AddedBlocks[Idx]);
}
for (MachineBasicBlock *MBB : BeforePrologueRegion) {
// Make sure the LiveIns are still sorted and unique.
MBB->sortUniqueLiveIns();
// Replace the edges to PrologueMBB by edges to the sequences
// we are about to add.
MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
}
// The required stack size that is aligned to ARM constant criterion.
AlignedStackSize = alignToARMConstant(StackSize);

View File

@ -31,8 +31,6 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,

View File

@ -13,6 +13,7 @@
#include "Thumb1FrameLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert((MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::tPOP_RET) &&
"Can only insert epilog into returning blocks");
DebugLoc dl = MBBI->getDebugLoc();
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ThumbRegisterInfo *RegInfo =
@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
} else {
if (MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != MBBI &&
std::prev(MBBI)->getOpcode() == ARM::tPOP) {
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
@ -409,43 +406,112 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
// MOV r3, ip
// BX lr
if (ArgRegsSaveSize || IsV4PopReturn) {
// Get the last instruction, tBX_RET
MBBI = MBB.getLastNonDebugInstr();
assert (MBBI->getOpcode() == ARM::tBX_RET);
DebugLoc dl = MBBI->getDebugLoc();
if (AFI->getReturnRegsCount() <= 3) {
// Epilogue: pop saved LR to R3 and branch off it.
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(ARM::R3, RegState::Define);
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
// If MBBI is a return instruction, we may be able to directly restore
// LR in the PC.
// This is possible if we do not need to emit any SP update.
// Otherwise, we need a temporary register to pop the value
// and copy that value into LR.
MBBI = MBB.getFirstTerminator();
if (!ArgRegsSaveSize && MBBI != MBB.end() &&
MBBI->getOpcode() == ARM::tBX_RET) {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
.addReg(ARM::R3, RegState::Kill);
AddDefaultPred(
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)))
.addReg(ARM::PC, RegState::Define);
MIB.copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
return;
}
// Look for a temporary register to use.
// First, compute the liveness information.
LivePhysRegs UsedRegs(STI.getRegisterInfo());
UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
// The semantic of pristines changed recently and now,
// the callee-saved registers that are touched in the function
// are not part of the pristines set anymore.
// Add those callee-saved now.
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i)
UsedRegs.addReg(CSRegs[i]);
DebugLoc dl = DebugLoc();
if (MBBI != MBB.end()) {
dl = MBBI->getDebugLoc();
auto InstUpToMBBI = MBB.end();
// The post-decrement is on purpose here.
// We want to have the liveness right before MBBI.
while (InstUpToMBBI-- != MBBI)
UsedRegs.stepBackward(*InstUpToMBBI);
}
// Look for a register that can be directly use in the POP.
unsigned PopReg = 0;
// And some temporary register, just in case.
unsigned TemporaryReg = 0;
BitVector PopFriendly =
TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
// Rebuild the GPRs from the high registers because they are removed
// form the GPR reg class for thumb1.
BitVector GPRsNoLRSP =
TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
GPRsNoLRSP |= PopFriendly;
GPRsNoLRSP.reset(ARM::LR);
GPRsNoLRSP.reset(ARM::SP);
GPRsNoLRSP.reset(ARM::PC);
for (int Register = GPRsNoLRSP.find_first(); Register != -1;
Register = GPRsNoLRSP.find_next(Register)) {
if (!UsedRegs.contains(Register)) {
// Remember the first pop-friendly register and exit.
if (PopFriendly.test(Register)) {
PopReg = Register;
TemporaryReg = 0;
break;
}
// Otherwise, remember that the register will be available to
// save a pop-friendly register.
TemporaryReg = Register;
}
}
assert((PopReg || TemporaryReg) && "Cannot get LR");
if (TemporaryReg) {
assert(!PopReg && "Unnecessary MOV is about to be inserted");
PopReg = PopFriendly.find_first();
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(TemporaryReg, RegState::Define)
.addReg(PopReg, RegState::Kill));
}
assert(PopReg && "Do not know how to get LR");
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(PopReg, RegState::Define);
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
if (!TemporaryReg && MBBI != MBB.end() &&
MBBI->getOpcode() == ARM::tBX_RET) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
.addReg(PopReg, RegState::Kill);
AddDefaultPred(MIB);
MIB.copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
} else {
return;
}
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::LR, RegState::Define)
.addReg(PopReg, RegState::Kill));
if (TemporaryReg) {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::R12, RegState::Define)
.addReg(ARM::R3, RegState::Kill));
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(ARM::R3, RegState::Define);
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::LR, RegState::Define)
.addReg(ARM::R3, RegState::Kill));
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::R3, RegState::Define)
.addReg(ARM::R12, RegState::Kill));
// Keep the tBX_RET instruction
.addReg(PopReg, RegState::Define)
.addReg(TemporaryReg, RegState::Kill));
}
}
}
@ -508,7 +574,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
bool NumRegs = false;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (Reg == ARM::LR) {
if (Reg == ARM::LR && MBB.succ_empty()) {
// Special epilogue for vararg functions. See emitEpilogue
if (isVarArg)
continue;
@ -517,7 +583,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
MIB.copyImplicitOps(&*MI);
if (MI != MBB.end())
MIB.copyImplicitOps(&*MI);
MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));

View File

@ -170,9 +170,9 @@ define void @test_varsize(...) minsize {
; CHECK-T1: push {r5, r6, r7, lr}
; ...
; CHECK-T1: pop {r2, r3, r7}
; CHECK-T1: pop {r3}
; CHECK-T1: pop {[[POP_REG:r[0-3]]]}
; CHECK-T1: add sp, #16
; CHECK-T1: bx r3
; CHECK-T1: bx [[POP_REG]]
; CHECK-LABEL: test_varsize:
; CHECK: sub sp, #16

View File

@ -23,11 +23,9 @@ entry:
; --------
; CHECK-V4T: add sp,
; CHECK-V4T-NEXT: pop {[[SAVED]]}
; CHECK-V4T-NEXT: mov r12, r3
; CHECK-V4T-NEXT: pop {r3}
; CHECK-V4T-NEXT: mov lr, r3
; CHECK-V4T-NEXT: mov r3, r12
; CHECK-V4T: bx lr
; We do not have any SP update to insert so we can just optimize
; the pop sequence.
; CHECK-V4T-NEXT: pop {pc}
; CHECK-V5T: pop {[[SAVED]], pc}
}
@ -53,19 +51,19 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
; CHECK-V4T-NEXT: mov r12, r3
; CHECK-V4T-NEXT: pop {r3}
; CHECK-V4T-NEXT: mov r12, [[POP_REG:r[0-7]]]
; CHECK-V4T-NEXT: pop {[[POP_REG]]}
; CHECK-V4T-NEXT: add sp,
; CHECK-V4T-NEXT: mov lr, r3
; CHECK-V4T-NEXT: mov r3, r12
; CHECK-V4T-NEXT: mov lr, [[POP_REG]]
; CHECK-V4T-NEXT: mov [[POP_REG]], r12
; CHECK-V4T: bx lr
; CHECK-V5T: add sp,
; CHECK-V5T-NEXT: pop {[[SAVED]]}
; CHECK-V5T-NEXT: mov r12, r3
; CHECK-V5T-NEXT: pop {r3}
; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]]
; CHECK-V5T-NEXT: pop {[[POP_REG]]}
; CHECK-V5T-NEXT: add sp,
; CHECK-V5T-NEXT: mov lr, r3
; CHECK-V5T-NEXT: mov r3, r12
; CHECK-V5T-NEXT: mov lr, [[POP_REG]]
; CHECK-V5T-NEXT: mov [[POP_REG]], r12
; CHECK-V5T-NEXT: bx lr
}
@ -95,8 +93,7 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
; CHECK-V4T: pop {r3}
; CHECK-V4T: bx r3
; CHECK-V4T: pop {pc}
; CHECK-V5T: pop {[[SAVED]], pc}
}
@ -148,14 +145,18 @@ entry:
; --------
; CHECK-V4T: add sp,
; CHECK-V4T-NEXT: pop {[[SAVED]]}
; CHECK-V4T-NEXT: pop {r3}
; Only r1 to r3 are available to pop LR.
; r0 is used for the return value.
; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V4T-NEXT: add sp,
; CHECK-V4T-NEXT: bx r3
; CHECK-V4T-NEXT: bx [[POP_REG]]
; CHECK-V5T: add sp,
; CHECK-V5T-NEXT: pop {[[SAVED]]}
; CHECK-V5T-NEXT: pop {r3}
; Only r1 to r3 are available to pop LR.
; r0 is used for the return value.
; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V5T-NEXT: add sp,
; CHECK-V5T-NEXT: bx r3
; CHECK-V5T-NEXT: bx [[POP_REG]]
}
; CHECK-V4T-LABEL: noframe
@ -191,13 +192,17 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
; CHECK-V4T-NEXT: pop {r3}
; Only r1 to r3 are available to pop LR.
; r0 is used for the return value.
; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V4T-NEXT: add sp,
; CHECK-V4T-NEXT: bx r3
; CHECK-V4T-NEXT: bx [[POP_REG]]
; CHECK-V5T: pop {[[SAVED]]}
; CHECK-V5T-NEXT: pop {r3}
; Only r1 to r3 are available to pop LR.
; r0 is used for the return value.
; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V5T-NEXT: add sp,
; CHECK-V5T-NEXT: bx r3
; CHECK-V5T-NEXT: bx [[POP_REG]]
}
declare void @llvm.va_start(i8*) nounwind

View File

@ -3,9 +3,9 @@
define void @t(i8* %a, ...) nounwind {
; CHECK-LABEL: t:
; CHECK: pop {r3}
; CHECK: pop {[[POP_REG:r[0-3]]]}
; CHECK-NEXT: add sp, #12
; CHECK-NEXT: bx r3
; CHECK-NEXT: bx [[POP_REG]]
entry:
%a.addr = alloca i8, i32 4
call void @llvm.va_start(i8* %a.addr)

View File

@ -32,12 +32,12 @@ bb7: ; preds = %bb
call void @llvm.va_end( i8* %va.upgrd.4 )
ret void
; The return sequence should pop the lr to r3, recover the stack space used to
; The return sequence should pop the lr to r0-3, recover the stack space used to
; store variadic argument registers, then return via r3. Possibly there is a pop
; before this, but only if the function happened to use callee-saved registers.
; CHECK: pop {r3}
; CHECK: pop {[[POP_REG:r[0-3]]]}
; CHECK: add sp, #[[IMM]]
; CHECK: bx r3
; CHECK: bx [[POP_REG]]
}
declare void @llvm.va_start(i8*)