diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index c24afa908d6..97d3600b1b8 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -13,6 +13,7 @@ #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" +#include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -168,6 +169,11 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MI->eraseFromParent(); } +static bool spillsCR(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo(); + return FuncInfo->isCRSpilled(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { @@ -184,13 +190,21 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). + // to adjust the stack pointer (we fit in the Red Zone). For 64-bit + // SVR4, we also require a stack frame if we need to spill the CR, + // since this spill area is addressed relative to the stack pointer. bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can + // still generate stackless code if all local vars are reg-allocated. + // Try: (FrameSize <= 224 + // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. + !(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave. + Subtarget.isSVR4ABI() + && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame MFI->setStackSize(0); @@ -488,7 +502,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); unsigned Reg = CSI[I].getReg(); if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; @@ -497,6 +510,25 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (PPC::CRBITRCRegClass.contains(Reg)) continue; + // For SVR4, don't emit a move for the CR spill slot if we haven't + // spilled CRs. + if (Subtarget.isSVR4ABI() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4) + && !spillsCR(MF)) + continue; + + // For 64-bit SVR4 when we have spilled CRs, the spill location + // is SP+8, not a frame-relative slot. + if (Subtarget.isSVR4ABI() + && Subtarget.isPPC64() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + MachineLocation CSDst(PPC::X1, 8); + MachineLocation CSSrc(PPC::CR2); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + continue; + } + + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(Label, CSDst, CSSrc)); @@ -714,11 +746,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -static bool spillsCR(const MachineFunction &MF) { - const PPCFunctionInfo *FuncInfo = MF.getInfo(); - return FuncInfo->isCRSpilled(); -} - /// MustSaveLR - Return true if this function requires that we save the LR /// register onto the stack in the prolog and restore it in the epilog of the /// function. @@ -808,7 +835,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) bool HasGPSaveArea = false; bool HasG8SaveArea = false; bool HasFPSaveArea = false; - bool HasCRSaveArea = false; bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; @@ -843,10 +869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinFPR) { MinFPR = Reg; } -// FIXME SVR4: Disable CR save area for now. } else if (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)) { -// HasCRSaveArea = true; + ; // do nothing, as we already know whether CRs are spilled } else if (PPC::VRSAVERCRegClass.contains(Reg)) { HasVRSAVESaveArea = true; } else if (PPC::VRRCRegClass.contains(Reg)) { @@ -926,16 +951,21 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } - // The CR save area is below the general register save area. - if (HasCRSaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the CR/CRBIT register class? + // For 32-bit only, the CR save area is below the general register + // save area. For 64-bit SVR4, the CR save area is addressed relative + // to the stack pointer and hence does not need an adjustment here. + // Only CR2 (the first nonvolatile spilled) has an associated frame + // index so that we have a single uniform save area. + if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::CRBITRCRegClass.contains(Reg) || - PPC::CRRCRegClass.contains(Reg)) { + if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) + // Leave Darwin logic as-is. + || (!Subtarget.isSVR4ABI() && + (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)))) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -973,3 +1003,184 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } } + +bool +PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast(MF->getTarget().getInstrInfo()); + DebugLoc DL; + bool CRSpilled = false; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + // CR2 through CR4 are the nonvolatile CR fields. + bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; + + if (CRSpilled && IsCRField) + continue; + + // Add the callee-saved register as live-in; it's killed at the spill. + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. + if (IsCRField) { + CRSpilled = true; + // The first time we see a CR field, store the whole CR into the + // save slot via GPR12 (available in the prolog for 32- and 64-bit). + if (Subtarget.isPPC64()) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12)); + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::X12, + getKillRegState(true)) + .addImm(8) + .addReg(PPC::X1)); + } else { + // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have + // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)); + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::R12, + getKillRegState(true)), + CSI[i].getFrameIdx())); + } + + // Record that we spill the CR in this function. + PPCFunctionInfo *FuncInfo = MF->getInfo(); + FuncInfo->setSpillsCR(); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); + } + } + return true; +} + +static void +restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector &CSI, unsigned CSIIndex) { + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast(MF->getTarget().getInstrInfo()); + DebugLoc DL; + unsigned RestoreOp, MoveReg; + + if (isPPC64) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12) + .addImm(8) + .addReg(PPC::X1)); + RestoreOp = PPC::MTCRF8; + MoveReg = PPC::X12; + } else { + // 32-bit: FP-relative + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), + PPC::R12), + CSI[CSIIndex].getFrameIdx())); + RestoreOp = PPC::MTCRF; + MoveReg = PPC::R12; + } + + if (CR2Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) + .addReg(MoveReg)); + + if (CR3Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) + .addReg(MoveReg)); + + if (CR4Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) + .addReg(MoveReg)); +} + +bool +PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast(MF->getTarget().getInstrInfo()); + bool CR2Spilled = false; + bool CR3Spilled = false; + bool CR4Spilled = false; + unsigned CSIIndex = 0; + + // Initialize insertion-point logic; we will be restoring in reverse + // order of spill. + MachineBasicBlock::iterator I = MI, BeforeI = I; + bool AtStart = I == MBB.begin(); + + if (!AtStart) + --BeforeI; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (Reg == PPC::CR2) { + CR2Spilled = true; + // The spill slot is associated only with CR2, which is the + // first nonvolatile spilled. Save it here. + CSIIndex = i; + continue; + } else if (Reg == PPC::CR3) { + CR3Spilled = true; + continue; + } else if (Reg == PPC::CR4) { + CR4Spilled = true; + continue; + } else { + // When we first encounter a non-CR register after seeing at + // least one CR register, restore all spilled CRs together. + if ((CR2Spilled || CR3Spilled || CR4Spilled) + && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + CR2Spilled = CR3Spilled = CR4Spilled = false; + } + + // Default behavior for non-CR saves. + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + } + + // Insert in reverse order. + if (AtStart) + I = MBB.begin(); + else { + I = BeforeI; + ++I; + } + } + + // If we haven't yet spilled the CRs, do so now. + if (CR2Spilled || CR3Spilled || CR4Spilled) + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + + return true; +} + diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d708541c668..4d957b91c7b 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -45,6 +45,16 @@ public: RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). @@ -170,23 +180,11 @@ public: {PPC::R15, -68}, {PPC::R14, -72}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, + // CR save area offset. We map each of the nonvolatile CR fields + // to the slot for CR2, which is the first of the nonvolatile CR + // fields to be assigned, so that we only allocate one save slot. + // See PPCRegisterInfo::hasReservedSpillSlot() for more information. + {PPC::CR2, -4}, // VRSAVE save area offset. {PPC::VRSAVE, -4}, @@ -228,27 +226,6 @@ public: {PPC::F14, -144}, // General register save area offsets. - // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit - // mode? - {PPC::R31, -4}, - {PPC::R30, -12}, - {PPC::R29, -20}, - {PPC::R28, -28}, - {PPC::R27, -36}, - {PPC::R26, -44}, - {PPC::R25, -52}, - {PPC::R24, -60}, - {PPC::R23, -68}, - {PPC::R22, -76}, - {PPC::R21, -84}, - {PPC::R20, -92}, - {PPC::R19, -100}, - {PPC::R18, -108}, - {PPC::R17, -116}, - {PPC::R16, -124}, - {PPC::R15, -132}, - {PPC::R14, -140}, - {PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, @@ -268,24 +245,6 @@ public: {PPC::X15, -136}, {PPC::X14, -144}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, - // VRSAVE save area offset. {PPC::VRSAVE, -4}, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index ab8bf1f93a3..4d568c22f8e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii) { + Subtarget(ST), TII(tii), CRSpillFrameIdx(0) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -111,6 +111,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : CSR_Darwin32_SaveList; + // For 32-bit SVR4, also initialize the frame index associated with + // the CR spill slot. + if (!Subtarget.isPPC64()) + CRSpillFrameIdx = 0; + return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; } @@ -477,6 +482,31 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +bool +PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, int &FrameIdx) const { + + // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 + // ABI, return true to prevent allocating an additional frame slot. + // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 + // is arbitrary and will be subsequently ignored. For 32-bit, we must + // create exactly one stack slot and return its FrameIdx for all + // nonvolatiles. + if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { + if (Subtarget.isPPC64()) { + FrameIdx = 0; + } else if (CRSpillFrameIdx) { + FrameIdx = CRSpillFrameIdx; + } else { + MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo(); + FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); + CRSpillFrameIdx = FrameIdx; + } + return true; + } + return false; +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 152c36d699e..98efa6d48dc 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -30,6 +30,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { std::map ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; + mutable int CRSpillFrameIdx; public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); @@ -65,6 +66,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, + int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll new file mode 100644 index 00000000000..3e98dbd254d --- /dev/null +++ b/test/CodeGen/PowerPC/crsave.ll @@ -0,0 +1,49 @@ +; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32 +; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64 + +declare void @foo() + +define i32 @test_cr2() nounwind { +entry: + %ret = alloca i32, align 4 + %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind + store i32 %0, i32* %ret, align 4 + call void @foo() + %1 = load i32* %ret, align 4 + ret i32 %1 +} + +; PPC32: mfcr 12 +; PPC32-NEXT: stw 12, {{[0-9]+}}(31) +; PPC32: lwz 12, {{[0-9]+}}(31) +; PPC32-NEXT: mtcrf 32, 12 + +; PPC64: mfcr 12 +; PPC64-NEXT: stw 12, 8(1) +; PPC64: lwz 12, 8(1) +; PPC64-NEXT: mtcrf 32, 12 + +define i32 @test_cr234() nounwind { +entry: + %ret = alloca i32, align 4 + %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09cmp 3,$2,$2\0A\09cmp 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind + store i32 %0, i32* %ret, align 4 + call void @foo() + %1 = load i32* %ret, align 4 + ret i32 %1 +} + +; PPC32: mfcr 12 +; PPC32-NEXT: stw 12, {{[0-9]+}}(31) +; PPC32: lwz 12, {{[0-9]+}}(31) +; PPC32-NEXT: mtcrf 32, 12 +; PPC32-NEXT: mtcrf 16, 12 +; PPC32-NEXT: mtcrf 8, 12 + +; PPC64: mfcr 12 +; PPC64-NEXT: stw 12, 8(1) +; PPC64: lwz 12, 8(1) +; PPC64-NEXT: mtcrf 32, 12 +; PPC64-NEXT: mtcrf 16, 12 +; PPC64-NEXT: mtcrf 8, 12 +