diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 6feec9f216a..0f8da5f046b 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -188,13 +188,21 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo(); + return FuncInfo->hasSpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -223,8 +231,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -241,7 +250,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -250,7 +260,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -311,11 +324,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; // Get processor type. @@ -780,7 +789,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. @@ -822,30 +831,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (MFI->hasVarSizedObjects() || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } } void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *) const { + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -853,6 +847,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -1031,6 +1026,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } } bool diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 8d9d395dc59..53ee32601f2 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -32,7 +32,9 @@ public: Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -46,6 +48,7 @@ public: RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1b4d3618a04..7ae34423309 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -554,10 +554,11 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setHasSpills(); + + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) FuncInfo->setSpillsCR(); - } for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 045b375dd88..10b2160b3d4 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,6 +37,9 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; @@ -78,6 +81,7 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), SpillsCR(false), LRStoreRequired(false), MinReservedArea(0), @@ -109,6 +113,9 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll index 433fe5cc2aa..0da6e4351bd 100644 --- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll +++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll @@ -9,12 +9,12 @@ entry: ;CHECK: mfcr r0 ;CHECK: lis r2, 1 ;CHECK: rlwinm r0, r0, 8, 0, 31 -;CHECK: ori r2, r2, 34524 +;CHECK: ori r2, r2, 34540 ;CHECK: stwx r0, r1, r2 ; Make sure that the register scavenger returns the same temporary register. ;CHECK: lis r2, 1 ;CHECK: mfcr r0 -;CHECK: ori r2, r2, 34520 +;CHECK: ori r2, r2, 34536 ;CHECK: rlwinm r0, r0, 12, 0, 31 ;CHECK: stwx r0, r1, r2 %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1] @@ -26,7 +26,7 @@ entry: return: ; preds = %entry ;CHECK: lis r2, 1 -;CHECK: ori r2, r2, 34524 +;CHECK: ori r2, r2, 34540 ;CHECK: lwzx r0, r1, r2 ;CHECK: rlwinm r0, r0, 24, 0, 31 ;CHECK: mtcrf 32, r0 diff --git a/test/CodeGen/PowerPC/frame-size.ll b/test/CodeGen/PowerPC/frame-size.ll new file mode 100644 index 00000000000..0e569a4602c --- /dev/null +++ b/test/CodeGen/PowerPC/frame-size.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" + +define i64 @foo() nounwind { +entry: + %x = alloca [32568 x i8] + %"alloca point" = bitcast i32 0 to i32 + %x1 = bitcast [32568 x i8]* %x to i8* + +; Check that the RS spill slot has been allocated (because the estimate +; will fail the small-frame-size check and the function has spills). +; CHECK: @foo +; CHECK: stdu 1, -32768(1) + + %s1 = call i64 @bar(i8* %x1) nounwind + %s2 = call i64 @bar(i8* %x1) nounwind + %s3 = call i64 @bar(i8* %x1) nounwind + %s4 = call i64 @bar(i8* %x1) nounwind + %s5 = call i64 @bar(i8* %x1) nounwind + %s6 = call i64 @bar(i8* %x1) nounwind + %s7 = call i64 @bar(i8* %x1) nounwind + %s8 = call i64 @bar(i8* %x1) nounwind + %r = call i64 @can(i64 %s1, i64 %s2, i64 %s3, i64 %s4, i64 %s5, i64 %s6, i64 %s7, i64 %s8) nounwind + br label %return + +return: + ret i64 %r +} + +declare i64 @bar(i8*) +declare i64 @can(i64, i64, i64, i64, i64, i64, i64, i64) +