diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 06eab7a4cea..ea25e71961b 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -302,10 +302,11 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, } // else Don't erase the return instruction. } -bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { +bool XCoreFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return true; @@ -337,10 +338,11 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const{ +bool XCoreFrameLowering:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const{ MachineFunction *MF = MBB.getParent(); const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); @@ -420,11 +422,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void -XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void XCoreFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); const TargetRegisterClass *RC = &XCore::GRRegsRegClass; XCoreFunctionInfo *XFI = MF.getInfo(); @@ -434,7 +435,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, bool isVarArg = MF.getFunction()->isVarArg(); int FrameIdx; if (! isVarArg) { - // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. + // A fixed offset of 0 allows us to save/restore LR using entsp/retsp. FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); } else { FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), @@ -443,17 +444,32 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, XFI->setUsesLR(FrameIdx); XFI->setLRSpillSlot(FrameIdx); } - if (RegInfo->requiresRegisterScavenging(MF)) { - // Reserve a slot close to SP or frame pointer. - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } - if (hasFP(MF)) { - // A callee save register is used to hold the FP. - // This needs saving / restoring in the epilogue / prologue. + + // A callee save register is used to hold the FP. + // This needs saving / restoring in the epilogue / prologue. + if (hasFP(MF)) XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); - } +} + +void XCoreFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + assert(RS && "requiresRegisterScavenging failed"); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + XCoreFunctionInfo *XFI = MF.getInfo(); + // Reserve slots close to SP or frame pointer for Scavenging spills. + // When using SP for small frames, we don't need any scratch registers. + // When using SP for large frames, we may need 2 scratch registers. + // When using FP, for large or small frames, we may need 1 scratch register. + if (XFI->isLargeFrame(MF) || hasFP(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + if (XFI->isLargeFrame(MF) && !hasFP(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); } diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index ebad62f2fa5..6cd90c96e7f 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -48,6 +48,9 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + //! Stack slot size (4 bytes) static int stackSlotSize() { return 4; diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index 7ca06729120..91b29760080 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -12,3 +12,19 @@ using namespace llvm; void XCoreFunctionInfo::anchor() { } + +bool XCoreFunctionInfo::isLargeFrame(const MachineFunction &MF) const { + if (CachedEStackSize == -1) { + CachedEStackSize = MF.getFrameInfo()->estimateStackSize(MF); + } + // isLargeFrame() is used when deciding if spill slots should be added to + // allow eliminateFrameIndex() to scavenge registers. + // This is only required when there is no FP and offsets are greater than + // ~256KB (~64Kwords). Thus only for code run on the emulator! + // + // The arbitrary value of 0xf000 allows frames of up to ~240KB before spill + // slots are added for the use of eliminateFrameIndex() register scavenging. + // For frames less than 240KB, it is assumed that there will be less than + // 16KB of function arguments. + return CachedEStackSize > 0xf000; +} diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index 69d5de3e03a..4fa4ee5d9c0 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -31,6 +31,7 @@ class XCoreFunctionInfo : public MachineFunctionInfo { int LRSpillSlot; int FPSpillSlot; int VarArgsFrameIndex; + mutable int CachedEStackSize; std::vector > SpillLabels; public: @@ -38,13 +39,15 @@ public: UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), - VarArgsFrameIndex(0) {} + VarArgsFrameIndex(0), + CachedEStackSize(-1) {} explicit XCoreFunctionInfo(MachineFunction &MF) : UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), - VarArgsFrameIndex(0) {} + VarArgsFrameIndex(0), + CachedEStackSize(-1) {} ~XCoreFunctionInfo() {} @@ -60,6 +63,8 @@ public: void setFPSpillSlot(int off) { FPSpillSlot = off; } int getFPSpillSlot() const { return FPSpillSlot; } + bool isLargeFrame(const MachineFunction &MF) const; + std::vector > &getSpillLabels() { return SpillLabels; } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 607e512b3ec..7c2d842e774 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -57,6 +57,165 @@ static inline bool isImmU16(unsigned val) { return val < (1 << 16); } +static void loadConstant(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned DstReg, int64_t Value) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + if (isMask_32(Value)) { + int N = Log2_32(Value) + 1; + BuildMI(MBB, II, dl, TII.get(XCore::MKMSK_rus), DstReg).addImm(N); + } else if (isImmU16(Value)) { + int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; + BuildMI(MBB, II, dl, TII.get(Opcode), DstReg).addImm(Value); + } else { + MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool(); + const Constant *C = ConstantInt::get( + Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + BuildMI(MBB, II, dl, TII.get(XCore::LDWCP_lru6), DstReg) + .addConstantPoolIndex(Idx); + } +} + +static void InsertFPImmInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, unsigned FrameReg, int Offset ) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + switch (MI.getOpcode()) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) + .addReg(FrameReg) + .addImm(Offset); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(FrameReg) + .addImm(Offset); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) + .addReg(FrameReg) + .addImm(Offset); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertFPConstInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, unsigned FrameReg, + int Offset, RegScavenger *RS ) { + assert(RS && "requiresRegisterScavenging failed"); + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchOffset); + loadConstant(II, TII, ScratchOffset, Offset); + + switch (MI.getOpcode()) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertSPImmInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, int Offset) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + bool isU6 = isImmU6(Offset); + switch (MI.getOpcode()) { + int NewOpcode; + case XCore::LDWFI: + NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + case XCore::STWFI: + NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addImm(Offset); + break; + case XCore::LDAWFI: + NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertSPConstInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, int Offset, RegScavenger *RS ) { + assert(RS && "requiresRegisterScavenging failed"); + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + unsigned OpCode = MI.getOpcode(); + + unsigned ScratchBase; + if (OpCode==XCore::STWFI) { + ScratchBase = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchBase); + } else + ScratchBase = Reg; + BuildMI(MBB, II, dl, TII.get(XCore::LDAWSP_ru6), ScratchBase).addImm(0); + unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchOffset); + loadConstant(II, TII, ScratchOffset, Offset); + + switch (OpCode) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { return MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); @@ -88,15 +247,12 @@ BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - // TODO can we estimate stack size? - return TFI->hasFP(MF); + return true; } bool XCoreRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); + return true; } bool @@ -110,7 +266,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - DebugLoc dl = MI.getDebugLoc(); MachineOperand &FrameOp = MI.getOperand(FIOperandNum); int FrameIndex = FrameOp.getIndex(); @@ -146,124 +301,28 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); assert(Offset%4 == 0 && "Misaligned stack offset"); - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - Offset/=4; - bool FP = TFI->hasFP(MF); - unsigned Reg = MI.getOperand(0).getReg(); - bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill(); - assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand"); - - MachineBasicBlock &MBB = *MI.getParent(); - - if (FP) { - bool isUs = isImmUs(Offset); - - if (!isUs) { - if (!RS) - report_fatal_error("eliminateFrameIndex Frame size too big: " + - Twine(Offset)); - unsigned ScratchReg = RS->scavengeRegister(&XCore::GRRegsRegClass, II, - SPAdj); - loadConstant(MBB, II, ScratchReg, Offset, dl); - switch (MI.getOpcode()) { - case XCore::LDWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) - .addReg(Reg, getKillRegState(isKill)) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - case XCore::LDAWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } - } else { - switch (MI.getOpcode()) { - case XCore::LDWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) - .addReg(FrameReg) - .addImm(Offset); - break; - case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) - .addReg(Reg, getKillRegState(isKill)) - .addReg(FrameReg) - .addImm(Offset); - break; - case XCore::LDAWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) - .addReg(FrameReg) - .addImm(Offset); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } - } - } else { - bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) - report_fatal_error("eliminateFrameIndex Frame size too big: " + - Twine(Offset)); - switch (MI.getOpcode()) { - int NewOpcode; - case XCore::LDWFI: - NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) - .addImm(Offset); - break; - case XCore::STWFI: - NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode)) - .addReg(Reg, getKillRegState(isKill)) - .addImm(Offset); - break; - case XCore::LDAWFI: - NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) - .addImm(Offset); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } + if (TFI->hasFP(MF)) { + if (isImmUs(Offset)) + InsertFPImmInst(II, TII, Reg, FrameReg, Offset); + else + InsertFPConstInst(II, TII, Reg, FrameReg, Offset, RS); + } else { + if (isImmU16(Offset)) + InsertSPImmInst(II, TII, Reg, Offset); + else + InsertSPConstInst(II, TII, Reg, Offset, RS); } // Erase old instruction. + MachineBasicBlock &MBB = *MI.getParent(); MBB.erase(II); } -void XCoreRegisterInfo:: -loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DstReg, int64_t Value, DebugLoc dl) const { - const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); - if (isMask_32(Value)) { - int N = Log2_32(Value) + 1; - BuildMI(MBB, I, dl, TII.get(XCore::MKMSK_rus), DstReg).addImm(N); - } else if (isImmU16(Value)) { - int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; - BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); - return; - } else { - MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool(); - const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value); - unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - BuildMI(MBB, I, dl, TII.get(XCore::LDWCP_lru6), DstReg) - .addConstantPoolIndex(Idx); - } -} unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 2370c6280f2..36ba7b46e5e 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -24,19 +24,6 @@ namespace llvm { class TargetInstrInfo; struct XCoreRegisterInfo : public XCoreGenRegisterInfo { -private: - void loadConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, int64_t Value, DebugLoc dl) const; - - void storeToStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned SrcReg, int Offset, DebugLoc dl) const; - - void loadFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, int Offset, DebugLoc dl) const; - public: XCoreRegisterInfo(); diff --git a/test/CodeGen/XCore/epilogue_prologue.ll b/test/CodeGen/XCore/epilogue_prologue.ll index ffbe7a1571f..2898ae5dc82 100644 --- a/test/CodeGen/XCore/epilogue_prologue.ll +++ b/test/CodeGen/XCore/epilogue_prologue.ll @@ -1,7 +1,11 @@ ; RUN: llc < %s -march=xcore | FileCheck %s ; RUN: llc < %s -march=xcore -disable-fp-elim | FileCheck %s -check-prefix=CHECKFP +; When using SP for small frames, we don't need any scratch registers (SR). +; When using SP for large frames, we may need two scratch registers. +; When using FP, for large or small frames, we may need one scratch register. +; FP + small frame: spill FP+SR = entsp 2 ; CHECKFP-LABEL: f1 ; CHECKFP: entsp 2 ; CHECKFP-NEXT: stw r10, sp[1] @@ -10,6 +14,7 @@ ; CHECKFP-NEXT: ldw r10, sp[1] ; CHECKFP-NEXT: retsp 2 ; +; !FP + small frame: no spills = no stack adjustment needed ; CHECK-LABEL: f1 ; CHECK: stw lr, sp[0] ; CHECK: ldw lr, sp[0] @@ -21,6 +26,7 @@ entry: } +; FP + small frame: spill FP+SR+R0+LR = entsp 3 + extsp 1 ; CHECKFP-LABEL:f3 ; CHECKFP: entsp 3 ; CHECKFP-NEXT: stw r10, sp[1] @@ -36,14 +42,15 @@ entry: ; CHECKFP-NEXT: ldw r10, sp[1] ; CHECKFP-NEXT: retsp 3 ; +; !FP + small frame: spill R0+LR = entsp 2 ; CHECK-LABEL: f3 ; CHECK: entsp 2 -; CHECK: stw [[REG:r[4-9]+]], sp[1] -; CHECK: mov [[REG]], r0 -; CHECK: bl f2 -; CHECK: mov r0, [[REG]] -; CHECK: ldw [[REG]], sp[1] -; CHECK: retsp 2 +; CHECK-NEXT: stw [[REG:r[4-9]+]], sp[1] +; CHECK-NEXT: mov [[REG]], r0 +; CHECK-NEXT: bl f2 +; CHECK-NEXT: mov r0, [[REG]] +; CHECK-NEXT: ldw [[REG]], sp[1] +; CHECK-NEXT: retsp 2 declare void @f2() define i32 @f3(i32 %i) nounwind { entry: @@ -52,6 +59,7 @@ entry: } +; FP + large frame: spill FP+SR = entsp 2 + 100000 ; CHECKFP-LABEL: f4 ; CHECKFP: extsp 65535 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} @@ -71,23 +79,32 @@ entry: ; CHECKFP-NEXT: ldaw sp, sp[34467] ; CHECKFP-NEXT: retsp 0 ; +; !FP + large frame: spill SR+SR = entsp 2 + 100000 ; CHECK-LABEL: f4 ; CHECK: extsp 65535 ; CHECK-NEXT: .Ltmp{{[0-9]+}} ; CHECK-NEXT: .cfi_def_cfa_offset 262140 -; CHECK-NEXT: extsp 34465 +; CHECK-NEXT: extsp 34467 ; CHECK-NEXT: .Ltmp{{[0-9]+}} -; CHECK-NEXT: .cfi_def_cfa_offset 400000 +; CHECK-NEXT: .cfi_def_cfa_offset 400008 ; CHECK-NEXT: ldaw sp, sp[65535] -; CHECK-NEXT: ldaw sp, sp[34465] +; CHECK-NEXT: ldaw sp, sp[34467] ; CHECK-NEXT: retsp 0 define void @f4() { entry: - %0 = alloca [100000 x i32], align 4 + %0 = alloca [100000 x i32] ret void } +; FP + large frame: spill FP+SR+R4+LR = entsp 3 + 200000 + extsp 1 +; CHECKFP: .section .cp.rodata.cst4,"aMc",@progbits,4 +; CHECKFP-NEXT: .align 4 +; CHECKFP-NEXT: .LCPI[[CNST0:[0-9_]+]]: +; CHECKFP-NEXT: .long 200002 +; CHECKFP-NEXT: .LCPI[[CNST1:[0-9_]+]]: +; CHECKFP-NEXT: .long 200001 +; CHECKFP-NEXT: .text ; CHECKFP-LABEL: f6 ; CHECKFP: entsp 65535 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} @@ -100,26 +117,47 @@ entry: ; CHECKFP-NEXT: extsp 65535 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} ; CHECKFP-NEXT: .cfi_def_cfa_offset 786420 -; CHECKFP-NEXT: extsp 3396 +; CHECKFP-NEXT: extsp 3398 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} -; CHECKFP-NEXT: .cfi_def_cfa_offset 800004 +; CHECKFP-NEXT: .cfi_def_cfa_offset 800012 ; CHECKFP-NEXT: stw r10, sp[1] ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} -; CHECKFP-NEXT: .cfi_offset 10, -800000 +; CHECKFP-NEXT: .cfi_offset 10, -800008 ; CHECKFP-NEXT: ldaw r10, sp[0] ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} ; CHECKFP-NEXT: .cfi_def_cfa_register 10 +; CHECKFP-NEXT: ldw r1, cp[.LCPI[[CNST0]]] +; CHECKFP-NEXT: stw [[REG:r[4-9]+]], r10[r1] +; CHECKFP-NEXT: .Ltmp{{[0-9]+}} +; CHECKFP-NEXT: .cfi_offset 4, -4 +; CHECKFP-NEXT: mov [[REG]], r0 ; CHECKFP-NEXT: extsp 1 ; CHECKFP-NEXT: ldaw r0, r10[2] ; CHECKFP-NEXT: bl f5 ; CHECKFP-NEXT: ldaw sp, sp[1] +; CHECKFP-NEXT: ldw r1, cp[.LCPI3_1] +; CHECKFP-NEXT: ldaw r0, r10[r1] +; CHECKFP-NEXT: extsp 1 +; CHECKFP-NEXT: bl f5 +; CHECKFP-NEXT: ldaw sp, sp[1] +; CHECKFP-NEXT: mov r0, [[REG]] +; CHECKFP-NEXT: ldw r1, cp[.LCPI[[CNST0]]] +; CHECKFP-NEXT: ldw [[REG]], r10[r1] ; CHECKFP-NEXT: set sp, r10 ; CHECKFP-NEXT: ldw r10, sp[1] ; CHECKFP-NEXT: ldaw sp, sp[65535] ; CHECKFP-NEXT: ldaw sp, sp[65535] ; CHECKFP-NEXT: ldaw sp, sp[65535] -; CHECKFP-NEXT: retsp 3396 +; CHECKFP-NEXT: retsp 3398 ; +; !FP + large frame: spill SR+SR+R4+LR = entsp 4 + 200000 +; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4 +; CHECK-NEXT: .align 4 +; CHECK-NEXT: .LCPI[[CNST0:[0-9_]+]]: +; CHECK-NEXT: .long 200003 +; CHECK-NEXT: .LCPI[[CNST1:[0-9_]+]]: +; CHECK-NEXT: .long 200002 +; CHECK-NEXT: .text ; CHECK-LABEL: f6 ; CHECK: entsp 65535 ; CHECK-NEXT: .Ltmp{{[0-9]+}} @@ -132,20 +170,65 @@ entry: ; CHECK-NEXT: extsp 65535 ; CHECK-NEXT: .Ltmp{{[0-9]+}} ; CHECK-NEXT: .cfi_def_cfa_offset 786420 -; CHECK-NEXT: extsp 3395 +; CHECK-NEXT: extsp 3399 ; CHECK-NEXT: .Ltmp{{[0-9]+}} -; CHECK-NEXT: .cfi_def_cfa_offset 800000 -; CHECK-NEXT: ldaw r0, sp[1] +; CHECK-NEXT: .cfi_def_cfa_offset 800016 +; CHECK-NEXT: ldaw r1, sp[0] +; CHECK-NEXT: ldw r2, cp[.LCPI[[CNST0]]] +; CHECK-NEXT: stw [[REG:r[4-9]+]], r1[r2] +; CHECK-NEXT: .Ltmp{{[0-9]+}} +; CHECK-NEXT: .cfi_offset 4, -4 +; CHECK-NEXT: mov [[REG]], r0 +; CHECK-NEXT: ldaw r0, sp[3] ; CHECK-NEXT: bl f5 +; CHECK-NEXT: ldaw r0, sp[0] +; CHECK-NEXT: ldw r1, cp[.LCPI[[CNST1]]] +; CHECK-NEXT: ldaw r0, r0[r1] +; CHECK-NEXT: bl f5 +; CHECK-NEXT: mov r0, [[REG]] +; CHECK-NEXT: ldaw [[REG]], sp[0] +; CHECK-NEXT: ldw r1, cp[.LCPI[[CNST0]]] +; CHECK-NEXT: ldw [[REG]], [[REG]][r1] ; CHECK-NEXT: ldaw sp, sp[65535] ; CHECK-NEXT: ldaw sp, sp[65535] ; CHECK-NEXT: ldaw sp, sp[65535] -; CHECK-NEXT: retsp 3395 +; CHECK-NEXT: retsp 3399 declare void @f5(i32*) -define void @f6() { +define i32 @f6(i32 %i) { entry: - %0 = alloca [199999 x i32], align 4 - %1 = getelementptr inbounds [199999 x i32]* %0, i32 0, i32 0 + %0 = alloca [200000 x i32] + %1 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 0 + call void @f5(i32* %1) + %2 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 199999 + call void @f5(i32* %2) + ret i32 %i +} + + +; FP + large frame: spill FP+SR+LR = entsp 2 + 32768 + extsp 1 +; CHECKFP-LABEL:f8 +; CHECKFP: entsp 32770 +; CHECKFP-NEXT: stw r10, sp[1] +; CHECKFP-NEXT: ldaw r10, sp[0] +; CHECKFP-NEXT: mkmsk r1, 15 +; CHECKFP-NEXT: ldaw r0, r10[r1] +; CHECKFP-NEXT: extsp 1 +; CHECKFP-NEXT: bl f5 +; CHECKFP-NEXT: ldaw sp, sp[1] +; CHECKFP-NEXT: set sp, r10 +; CHECKFP-NEXT: ldw r10, sp[1] +; CHECKFP-NEXT: retsp 32770 +; +; !FP + large frame: spill SR+SR+LR = entsp 3 + 32768 +; CHECK-LABEL:f8 +; CHECK: entsp 32771 +; CHECK-NEXT: ldaw r0, sp[32768] +; CHECK-NEXT: bl f5 +; CHECK-NEXT: retsp 32771 +define void @f8() nounwind { +entry: + %0 = alloca [32768 x i32] + %1 = getelementptr inbounds [32768 x i32]* %0, i32 0, i32 32765 call void @f5(i32* %1) ret void } diff --git a/test/CodeGen/XCore/epilogue_prologue_fp.ll b/test/CodeGen/XCore/epilogue_prologue_fp.ll deleted file mode 100644 index 9b9837c90d4..00000000000 --- a/test/CodeGen/XCore/epilogue_prologue_fp.ll +++ /dev/null @@ -1,42 +0,0 @@ -; Functions with frames > 256K bytes require a frame pointer to access the stack. -; At present, functions must be compiled using '-fno-omit-frame-pointer'. -; RUN: llc < %s -march=xcore -disable-fp-elim | FileCheck %s - -declare void @f0(i32*) - -; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4 -; CHECK: .LCPI[[NUM:[0-9_]+]]: -; CHECK: .long 99999 -; CHECK: .text -; CHECK-LABEL:f1 -; CHECK: entsp 65535 -; CHECK-NEXT: extsp 34465 -; CHECK-NEXT: stw r10, sp[1] -; CHECK-NEXT: ldaw r10, sp[0] -; CHECK-NEXT: ldw r1, cp[.LCPI[[NUM]]] -; CHECK-NEXT: ldaw r0, r10[r1] -; CHECK-NEXT: extsp 1 -; CHECK-NEXT: bl f0 -; CHECK-NEXT: ldaw sp, sp[1] -; CHECK-NEXT: set sp, r10 -; CHECK-NEXT: ldw r10, sp[1] -; CHECK-NEXT: ldaw sp, sp[65535] -; CHECK-NEXT: retsp 34465 -define void @f1() nounwind { -entry: - %0 = alloca [99998 x i32] - %1 = getelementptr inbounds [99998 x i32]* %0, i32 0, i32 99997 - call void @f0(i32* %1) - ret void -} - -; CHECK-LABEL:f2 -; CHECK: mkmsk [[REG:r[0-9]+]], 15 -; CHECK-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}} -define void @f2() nounwind { -entry: - %0 = alloca [32768 x i32] - %1 = getelementptr inbounds [32768 x i32]* %0, i32 0, i32 32765 - call void @f0(i32* %1) - ret void -} diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll index 5b612d0f9b5..f96ecd3fc21 100644 --- a/test/CodeGen/XCore/scavenging.ll +++ b/test/CodeGen/XCore/scavenging.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=xcore +; RUN: llc < %s -march=xcore | FileCheck %s + @size = global i32 0 ; [#uses=1] @g0 = external global i32 ; [#uses=2] @g1 = external global i32 ; [#uses=2] @@ -48,5 +49,70 @@ entry: call void @g(i32* %x1, i32* %1) nounwind ret void } - declare void @g(i32*, i32*) + + +; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4 +; CHECK: .align 4 +; CHECK: [[ARG5:.LCPI[0-9_]+]]: +; CHECK: .long 100003 +; CHECK: [[INDEX0:.LCPI[0-9_]+]]: +; CHECK: .long 80002 +; CHECK: [[INDEX1:.LCPI[0-9_]+]]: +; CHECK: .long 81002 +; CHECK: [[INDEX2:.LCPI[0-9_]+]]: +; CHECK: .long 82002 +; CHECK: [[INDEX3:.LCPI[0-9_]+]]: +; CHECK: .long 83002 +; CHECK: [[INDEX4:.LCPI[0-9_]+]]: +; CHECK: .long 84002 +; CHECK: .text +; !FP + large frame: spill SR+SR = entsp 2 + 100000 +; CHECK-LABEL: ScavengeSlots: +; CHECK: extsp 65535 +; CHECK: extsp 34467 +; scavenge r11 +; CHECK: ldaw r11, sp[0] +; scavenge r4 using SR spill slot +; CHECK: stw r4, sp[1] +; CHECK: ldw r4, cp{{\[}}[[ARG5]]{{\]}} +; r11 used to load 5th argument +; CHECK: ldw r11, r11[r4] +; CHECK: ldaw r4, sp[0] +; scavenge r5 using SR spill slot +; CHECK: stw r5, sp[0] +; CHECK: ldw r5, cp{{\[}}[[INDEX0]]{{\]}} +; r4 & r5 used by InsertSPConstInst() to emit STW_l3r instruction. +; CHECK: stw r0, r4[r5] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r5, cp{{\[}}[[INDEX1]]{{\]}} +; CHECK: stw r1, r0[r5] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r1, cp{{\[}}[[INDEX2]]{{\]}} +; CHECK: stw r2, r0[r1] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r1, cp{{\[}}[[INDEX3]]{{\]}} +; CHECK: stw r3, r0[r1] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r1, cp{{\[}}[[INDEX4]]{{\]}} +; CHECK: stw r11, r0[r1] +; CHECK: ldaw sp, sp[65535] +; CHECK: ldaw sp, sp[34467] +; CHECK: ldw r4, sp[1] +; CHECK: ldw r5, sp[0] +; CHECK: retsp 0 +define void @ScavengeSlots(i32 %r0, i32 %r1, i32 %r2, i32 %r3, i32 %r4) nounwind { +entry: + %Data = alloca [100000 x i32] + %i0 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 80000 + store volatile i32 %r0, i32* %i0 + %i1 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 81000 + store volatile i32 %r1, i32* %i1 + %i2 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 82000 + store volatile i32 %r2, i32* %i2 + %i3 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 83000 + store volatile i32 %r3, i32* %i3 + %i4 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 84000 + store volatile i32 %r4, i32* %i4 + ret void +}