diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 4438f50758d..f090e5abd95 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -747,6 +747,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, switch (Opcode) { default: return false; + + case ARM::TCRETURNdi: + case ARM::TCRETURNri: { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->isReturn() && + "Can only insert epilog into returning blocks"); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc dl = MBBI->getDebugLoc(); + const ARMBaseInstrInfo &TII = *static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + + // Jump to label or value in register. + if (RetOpcode == ARM::TCRETURNdi) { + unsigned TCOpcode = + STI->isThumb() + ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + : ARM::TAILJMPd; + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); + if (JumpTarget.isGlobal()) + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + else { + assert(JumpTarget.isSymbol()); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + + // Add the default predicate in Thumb mode. + if (STI->isThumb()) + MIB.addImm(ARMCC::AL).addReg(0); + } else if (RetOpcode == ARM::TCRETURNri) { + BuildMI(MBB, MBBI, dl, + TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)) + .addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = std::prev(MBBI); + for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + MBBI = NewMI; + return true; + } case ARM::VMOVScc: case ARM::VMOVDcc: { unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 6744000afe2..7b2369463e8 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -689,60 +689,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } -// Resolve TCReturn pseudo-instruction -void ARMFrameLowering::fixTCReturn(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc dl = MBBI->getDebugLoc(); - const ARMBaseInstrInfo &TII = - *static_cast(MF.getSubtarget().getInstrInfo()); - - if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) - return; - - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - - // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi) { - unsigned TCOpcode = STI.isThumb() ? - (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : - ARM::TAILJMPd; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); - if (JumpTarget.isGlobal()) - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - else { - assert(JumpTarget.isSymbol()); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - - // Add the default predicate in Thumb mode. - if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); - } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = std::prev(MBBI); - for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - MBBI = NewMI; -} - void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); @@ -758,10 +706,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) { - fixTCReturn(MF, MBB); + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - } + + // First put ourselves on the first (from top) terminator instructions. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) @@ -840,8 +790,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - fixTCReturn(MF, MBB); - if (ArgRegsSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); } @@ -1008,7 +956,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, // Put any subsequent vpush instructions before this one: they will refer to // higher register numbers so need to be pushed first in order to preserve // monotonicity. - --MI; + if (MI != MBB.begin()) + --MI; } } @@ -1022,12 +971,16 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc DL = MI->getDebugLoc(); - unsigned RetOpcode = MI->getOpcode(); - bool isTailCall = (RetOpcode == ARM::TCRETURNdi || - RetOpcode == ARM::TCRETURNri); - bool isInterrupt = - RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; + DebugLoc DL; + bool isTailCall = false; + bool isInterrupt = false; + if (MBB.end() != MI) { + DL = MI->getDebugLoc(); + unsigned RetOpcode = MI->getOpcode(); + isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); + isInterrupt = + RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; + } SmallVector Regs; unsigned i = CSI.size(); @@ -1044,10 +997,13 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && STI.hasV5TOps()) { - Reg = ARM::PC; - LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + if (MBB.succ_empty()) { + Reg = ARM::PC; + DeleteRet = true; + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + } else + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; // Fold the return instruction into the LDM. - DeleteRet = true; } // If NoGap is true, pop consecutive registers and then leave the rest @@ -1068,7 +1024,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, .addReg(ARM::SP)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); - if (DeleteRet) { + if (DeleteRet && MI != MBB.end()) { MIB.copyImplicitOps(&*MI); MI->eraseFromParent(); } @@ -1095,7 +1051,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, // Put any subsequent vpop instructions after this one: they will refer to // higher register numbers so need to be popped afterwards. - ++MI; + if (MI != MBB.end()) + ++MI; } } @@ -1913,21 +1870,51 @@ void ARMFrameLowering::adjustForSegmentedStacks( MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); + // Grab everything that reaches PrologueMBB to update there liveness as well. + SmallPtrSet BeforePrologueRegion; + SmallVector WalkList; + WalkList.push_back(&PrologueMBB); + + do { + MachineBasicBlock *CurMBB = WalkList.pop_back_val(); + for (MachineBasicBlock *PredBB : CurMBB->predecessors()) { + if (BeforePrologueRegion.insert(PredBB).second) + WalkList.push_back(PredBB); + } + } while (!WalkList.empty()); + + // The order in that list is important. + // The blocks will all be inserted before PrologueMBB using that order. + // Therefore the block that should appear first in the CFG should appear + // first in the list. + MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB, + PostStackMBB}; + const int NbAddedBlocks = sizeof(AddedBlocks) / sizeof(AddedBlocks[0]); + + for (int Idx = 0; Idx < NbAddedBlocks; ++Idx) + BeforePrologueRegion.insert(AddedBlocks[Idx]); + for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), e = PrologueMBB.livein_end(); i != e; ++i) { - AllocMBB->addLiveIn(*i); - GetMBB->addLiveIn(*i); - McrMBB->addLiveIn(*i); - PrevStackMBB->addLiveIn(*i); - PostStackMBB->addLiveIn(*i); + for (MachineBasicBlock *PredBB : BeforePrologueRegion) + PredBB->addLiveIn(*i); } - MF.push_front(PostStackMBB); - MF.push_front(AllocMBB); - MF.push_front(GetMBB); - MF.push_front(McrMBB); - MF.push_front(PrevStackMBB); + // Remove the newly added blocks from the list, since we know + // we do not have to do the following updates for them. + for (int Idx = 0; Idx < NbAddedBlocks; ++Idx) { + BeforePrologueRegion.erase(AddedBlocks[Idx]); + MF.insert(&PrologueMBB, AddedBlocks[Idx]); + } + + for (MachineBasicBlock *MBB : BeforePrologueRegion) { + // Make sure the LiveIns are still sorted and unique. + MBB->sortUniqueLiveIns(); + // Replace the edges to PrologueMBB by edges to the sequences + // we are about to add. + MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); + } // The required stack size that is aligned to ARM constant criterion. AlignedStackSize = alignToARMConstant(StackSize); diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 6fdc5eff5e4..aabdd8dbe8f 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -31,8 +31,6 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 3b4358b5d9b..79883c100fa 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -13,6 +13,7 @@ #include "Thumb1FrameLowering.h" #include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert((MBBI->getOpcode() == ARM::tBX_RET || - MBBI->getOpcode() == ARM::tPOP_RET) && - "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); const ThumbRegisterInfo *RegInfo = @@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, ARM::SP) .addReg(FramePtr)); } else { - if (MBBI->getOpcode() == ARM::tBX_RET && - &MBB.front() != MBBI && - std::prev(MBBI)->getOpcode() == ARM::tPOP) { + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); @@ -409,43 +406,112 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // MOV r3, ip // BX lr if (ArgRegsSaveSize || IsV4PopReturn) { - // Get the last instruction, tBX_RET - MBBI = MBB.getLastNonDebugInstr(); - assert (MBBI->getOpcode() == ARM::tBX_RET); - DebugLoc dl = MBBI->getDebugLoc(); - - if (AFI->getReturnRegsCount() <= 3) { - // Epilogue: pop saved LR to R3 and branch off it. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); - - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - + // If MBBI is a return instruction, we may be able to directly restore + // LR in the PC. + // This is possible if we do not need to emit any SP update. + // Otherwise, we need a temporary register to pop the value + // and copy that value into LR. + MBBI = MBB.getFirstTerminator(); + if (!ArgRegsSaveSize && MBBI != MBB.end() && + MBBI->getOpcode() == ARM::tBX_RET) { MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) - .addReg(ARM::R3, RegState::Kill); + AddDefaultPred( + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))) + .addReg(ARM::PC, RegState::Define); + MIB.copyImplicitOps(&*MBBI); + // erase the old tBX_RET instruction + MBB.erase(MBBI); + return; + } + + // Look for a temporary register to use. + // First, compute the liveness information. + LivePhysRegs UsedRegs(STI.getRegisterInfo()); + UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true); + // The semantic of pristines changed recently and now, + // the callee-saved registers that are touched in the function + // are not part of the pristines set anymore. + // Add those callee-saved now. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) + UsedRegs.addReg(CSRegs[i]); + + DebugLoc dl = DebugLoc(); + if (MBBI != MBB.end()) { + dl = MBBI->getDebugLoc(); + auto InstUpToMBBI = MBB.end(); + // The post-decrement is on purpose here. + // We want to have the liveness right before MBBI. + while (InstUpToMBBI-- != MBBI) + UsedRegs.stepBackward(*InstUpToMBBI); + } + + // Look for a register that can be directly use in the POP. + unsigned PopReg = 0; + // And some temporary register, just in case. + unsigned TemporaryReg = 0; + BitVector PopFriendly = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); + // Rebuild the GPRs from the high registers because they are removed + // form the GPR reg class for thumb1. + BitVector GPRsNoLRSP = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + GPRsNoLRSP |= PopFriendly; + GPRsNoLRSP.reset(ARM::LR); + GPRsNoLRSP.reset(ARM::SP); + GPRsNoLRSP.reset(ARM::PC); + for (int Register = GPRsNoLRSP.find_first(); Register != -1; + Register = GPRsNoLRSP.find_next(Register)) { + if (!UsedRegs.contains(Register)) { + // Remember the first pop-friendly register and exit. + if (PopFriendly.test(Register)) { + PopReg = Register; + TemporaryReg = 0; + break; + } + // Otherwise, remember that the register will be available to + // save a pop-friendly register. + TemporaryReg = Register; + } + } + + assert((PopReg || TemporaryReg) && "Cannot get LR"); + + if (TemporaryReg) { + assert(!PopReg && "Unnecessary MOV is about to be inserted"); + PopReg = PopFriendly.find_first(); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(TemporaryReg, RegState::Define) + .addReg(PopReg, RegState::Kill)); + } + + assert(PopReg && "Do not know how to get LR"); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(PopReg, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + if (!TemporaryReg && MBBI != MBB.end() && + MBBI->getOpcode() == ARM::tBX_RET) { + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(PopReg, RegState::Kill); AddDefaultPred(MIB); MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); - } else { + return; + } + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); + + if (TemporaryReg) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::R12, RegState::Define) - .addReg(ARM::R3, RegState::Kill)); - - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); - - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(ARM::R3, RegState::Kill)); - - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::R3, RegState::Define) - .addReg(ARM::R12, RegState::Kill)); - // Keep the tBX_RET instruction + .addReg(PopReg, RegState::Define) + .addReg(TemporaryReg, RegState::Kill)); } } } @@ -508,7 +574,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool NumRegs = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (Reg == ARM::LR) { + if (Reg == ARM::LR && MBB.succ_empty()) { // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) continue; @@ -517,7 +583,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - MIB.copyImplicitOps(&*MI); + if (MI != MBB.end()) + MIB.copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll index 1fd9bd9e47a..b62d0dfee07 100644 --- a/test/CodeGen/ARM/fold-stack-adjust.ll +++ b/test/CodeGen/ARM/fold-stack-adjust.ll @@ -170,9 +170,9 @@ define void @test_varsize(...) minsize { ; CHECK-T1: push {r5, r6, r7, lr} ; ... ; CHECK-T1: pop {r2, r3, r7} -; CHECK-T1: pop {r3} +; CHECK-T1: pop {[[POP_REG:r[0-3]]]} ; CHECK-T1: add sp, #16 -; CHECK-T1: bx r3 +; CHECK-T1: bx [[POP_REG]] ; CHECK-LABEL: test_varsize: ; CHECK: sub sp, #16 diff --git a/test/CodeGen/ARM/thumb1_return_sequence.ll b/test/CodeGen/ARM/thumb1_return_sequence.ll index 9c62faeaa68..5b9c19ab5eb 100644 --- a/test/CodeGen/ARM/thumb1_return_sequence.ll +++ b/test/CodeGen/ARM/thumb1_return_sequence.ll @@ -23,11 +23,9 @@ entry: ; -------- ; CHECK-V4T: add sp, ; CHECK-V4T-NEXT: pop {[[SAVED]]} -; CHECK-V4T-NEXT: mov r12, r3 -; CHECK-V4T-NEXT: pop {r3} -; CHECK-V4T-NEXT: mov lr, r3 -; CHECK-V4T-NEXT: mov r3, r12 -; CHECK-V4T: bx lr +; We do not have any SP update to insert so we can just optimize +; the pop sequence. +; CHECK-V4T-NEXT: pop {pc} ; CHECK-V5T: pop {[[SAVED]], pc} } @@ -53,19 +51,19 @@ entry: ; Epilogue ; -------- ; CHECK-V4T: pop {[[SAVED]]} -; CHECK-V4T-NEXT: mov r12, r3 -; CHECK-V4T-NEXT: pop {r3} +; CHECK-V4T-NEXT: mov r12, [[POP_REG:r[0-7]]] +; CHECK-V4T-NEXT: pop {[[POP_REG]]} ; CHECK-V4T-NEXT: add sp, -; CHECK-V4T-NEXT: mov lr, r3 -; CHECK-V4T-NEXT: mov r3, r12 +; CHECK-V4T-NEXT: mov lr, [[POP_REG]] +; CHECK-V4T-NEXT: mov [[POP_REG]], r12 ; CHECK-V4T: bx lr ; CHECK-V5T: add sp, ; CHECK-V5T-NEXT: pop {[[SAVED]]} -; CHECK-V5T-NEXT: mov r12, r3 -; CHECK-V5T-NEXT: pop {r3} +; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]] +; CHECK-V5T-NEXT: pop {[[POP_REG]]} ; CHECK-V5T-NEXT: add sp, -; CHECK-V5T-NEXT: mov lr, r3 -; CHECK-V5T-NEXT: mov r3, r12 +; CHECK-V5T-NEXT: mov lr, [[POP_REG]] +; CHECK-V5T-NEXT: mov [[POP_REG]], r12 ; CHECK-V5T-NEXT: bx lr } @@ -95,8 +93,7 @@ entry: ; Epilogue ; -------- ; CHECK-V4T: pop {[[SAVED]]} -; CHECK-V4T: pop {r3} -; CHECK-V4T: bx r3 +; CHECK-V4T: pop {pc} ; CHECK-V5T: pop {[[SAVED]], pc} } @@ -148,14 +145,18 @@ entry: ; -------- ; CHECK-V4T: add sp, ; CHECK-V4T-NEXT: pop {[[SAVED]]} -; CHECK-V4T-NEXT: pop {r3} +; Only r1 to r3 are available to pop LR. +; r0 is used for the return value. +; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]} ; CHECK-V4T-NEXT: add sp, -; CHECK-V4T-NEXT: bx r3 +; CHECK-V4T-NEXT: bx [[POP_REG]] ; CHECK-V5T: add sp, ; CHECK-V5T-NEXT: pop {[[SAVED]]} -; CHECK-V5T-NEXT: pop {r3} +; Only r1 to r3 are available to pop LR. +; r0 is used for the return value. +; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]} ; CHECK-V5T-NEXT: add sp, -; CHECK-V5T-NEXT: bx r3 +; CHECK-V5T-NEXT: bx [[POP_REG]] } ; CHECK-V4T-LABEL: noframe @@ -191,13 +192,17 @@ entry: ; Epilogue ; -------- ; CHECK-V4T: pop {[[SAVED]]} -; CHECK-V4T-NEXT: pop {r3} +; Only r1 to r3 are available to pop LR. +; r0 is used for the return value. +; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]} ; CHECK-V4T-NEXT: add sp, -; CHECK-V4T-NEXT: bx r3 +; CHECK-V4T-NEXT: bx [[POP_REG]] ; CHECK-V5T: pop {[[SAVED]]} -; CHECK-V5T-NEXT: pop {r3} +; Only r1 to r3 are available to pop LR. +; r0 is used for the return value. +; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]} ; CHECK-V5T-NEXT: add sp, -; CHECK-V5T-NEXT: bx r3 +; CHECK-V5T-NEXT: bx [[POP_REG]] } declare void @llvm.va_start(i8*) nounwind diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll index 3c539c69017..851f793e5ee 100644 --- a/test/CodeGen/Thumb/pop.ll +++ b/test/CodeGen/Thumb/pop.ll @@ -3,9 +3,9 @@ define void @t(i8* %a, ...) nounwind { ; CHECK-LABEL: t: -; CHECK: pop {r3} +; CHECK: pop {[[POP_REG:r[0-3]]]} ; CHECK-NEXT: add sp, #12 -; CHECK-NEXT: bx r3 +; CHECK-NEXT: bx [[POP_REG]] entry: %a.addr = alloca i8, i32 4 call void @llvm.va_start(i8* %a.addr) diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll index 1c7b631741b..45b42125e16 100644 --- a/test/CodeGen/Thumb/vargs.ll +++ b/test/CodeGen/Thumb/vargs.ll @@ -32,12 +32,12 @@ bb7: ; preds = %bb call void @llvm.va_end( i8* %va.upgrd.4 ) ret void -; The return sequence should pop the lr to r3, recover the stack space used to +; The return sequence should pop the lr to r0-3, recover the stack space used to ; store variadic argument registers, then return via r3. Possibly there is a pop ; before this, but only if the function happened to use callee-saved registers. -; CHECK: pop {r3} +; CHECK: pop {[[POP_REG:r[0-3]]]} ; CHECK: add sp, #[[IMM]] -; CHECK: bx r3 +; CHECK: bx [[POP_REG]] } declare void @llvm.va_start(i8*)