diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 09bbfc6a45a..02881433b69 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -196,8 +196,65 @@ void PPCDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { CodeGenMap.clear(); DAG.RemoveDeadNodes(); - // Emit machine code to BB. + // Emit machine code to BB. ScheduleAndEmitDAG(DAG); + + // Check to see if this function uses vector registers, which means we have to + // save and restore the VRSAVE register and update it with the regs we use. + // + // In this case, there will be virtual registers of vector type type created + // by the scheduler. Detect them now. + SSARegMap *RegMap = DAG.getMachineFunction().getSSARegMap(); + bool HasVectorVReg = false; + for (unsigned i = MRegisterInfo::FirstVirtualRegister, + e = RegMap->getLastVirtReg(); i != e; ++i) + if (RegMap->getRegClass(i) == &PPC::VRRCRegClass) { + HasVectorVReg = true; + break; + } + + // If we have a vector register, we want to emit code into the entry and exit + // blocks to save and restore the VRSAVE register. We do this here (instead + // of marking all vector instructions as clobbering VRSAVE) for two reasons: + // + // 1. This (trivially) reduces the load on the register allocator, by not + // having to represent the live range of the VRSAVE register. + // 2. This (more significantly) allows us to create a temporary virtual + // register to hold the saved VRSAVE value, allowing this temporary to be + // register allocated, instead of forcing it to be spilled to the stack. + if (HasVectorVReg) { + // Create two vregs - one to hold the VRSAVE register that is live-in to the + // function and one for the value after having bits or'd into it. + unsigned InVRSAVE = RegMap->createVirtualRegister(&PPC::GPRCRegClass); + unsigned UpdatedVRSAVE = RegMap->createVirtualRegister(&PPC::GPRCRegClass); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock &EntryBB = *MF.begin(); + // Emit the following code into the entry block: + // InVRSAVE = MFVRSAVE + // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE + // MTVRSAVE UpdatedVRSAVE + MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point + BuildMI(EntryBB, IP, PPC::MFVRSAVE, 0, InVRSAVE); + BuildMI(EntryBB, IP, PPC::UPDATE_VRSAVE, 1, UpdatedVRSAVE).addReg(InVRSAVE); + BuildMI(EntryBB, IP, PPC::MTVRSAVE, 1).addReg(UpdatedVRSAVE); + + // Find all return blocks, outputting a restore in each epilog. + const TargetInstrInfo &TII = *DAG.getTarget().getInstrInfo(); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + if (!BB->empty() && TII.isReturn(BB->back().getOpcode())) { + IP = BB->end(); --IP; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = IP; + while (I2 != BB->begin() && TII.isTerminatorInstr((--I2)->getOpcode())) + IP = I2; + + // Emit: MTVRSAVE InVRSave + BuildMI(*BB, IP, PPC::MTVRSAVE, 1).addReg(InVRSAVE); + } + } } /// getGlobalBaseReg - Output the instructions required to put the diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index b41a77a10c5..1aa43bad581 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -210,6 +210,9 @@ def ADJCALLSTACKDOWN : Pseudo<(ops u16imm:$amt), def ADJCALLSTACKUP : Pseudo<(ops u16imm:$amt), "; ADJCALLSTACKUP", [(callseq_end imm:$amt)]>; + +def UPDATE_VRSAVE : Pseudo<(ops GPRC:$rD, GPRC:$rS), + "UPDATE_VRSAVE $rD, $rS", []>; } def IMPLICIT_DEF_GPR : Pseudo<(ops GPRC:$rD), "; $rD = IMPLICIT_DEF_GPRC", [(set GPRC:$rD, (undef))]>; @@ -694,8 +697,24 @@ def MCRF : XLForm_3<19, 0, (ops CRRC:$BF, CRRC:$BFA), // def MFCTR : XFXForm_1_ext<31, 339, 9, (ops GPRC:$rT), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; +def MTCTR : XFXForm_7_ext<31, 467, 9, (ops GPRC:$rS), "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; + +def MTLR : XFXForm_7_ext<31, 467, 8, (ops GPRC:$rS), "mtlr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; def MFLR : XFXForm_1_ext<31, 339, 8, (ops GPRC:$rT), "mflr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; + +// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like +// a GPR on the PPC970. As such, copies in and out have the same performance +// characteristics as an OR instruction. +def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (ops GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_Unit_FXU; +def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (ops GPRC:$rT), + "mfspr $rT, 256", IntGeneral>, + PPC970_Unit_FXU; + def MFCR : XFXForm_3<31, 19, (ops GPRC:$rT), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MTCRF : XFXForm_5<31, 144, (ops crbitm:$FXM, GPRC:$rS), @@ -704,13 +723,6 @@ def MTCRF : XFXForm_5<31, 144, (ops crbitm:$FXM, GPRC:$rS), def MFOCRF: XFXForm_5a<31, 19, (ops GPRC:$rT, crbitm:$FXM), "mfcr $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -def MTCTR : XFXForm_7_ext<31, 467, 9, (ops GPRC:$rS), "mtctr $rS", SprMTSPR>, - PPC970_DGroup_First, PPC970_Unit_FXU; -def MTLR : XFXForm_7_ext<31, 467, 8, (ops GPRC:$rS), "mtlr $rS", SprMTSPR>, - PPC970_DGroup_First, PPC970_Unit_FXU; -def MTSPR : XFXForm_7<31, 467, (ops GPRC:$rS, u16imm:$UIMM), "mtspr $UIMM, $rS", - SprMTSPR>, - PPC970_DGroup_Single, PPC970_Unit_FXU; // XS-Form instructions. Just 'sradi' // diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 8d2037cc6b5..d55bdc7627e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -266,12 +266,63 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const { } } +// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the +// instruction selector. Based on the vector registers that have been used, +// transform this into the appropriate ORI instruction. +static void HandleVRSaveUpdate(MachineInstr *MI, const bool *UsedRegs) { + unsigned UsedRegMask = 0; +#define HANDLEREG(N) if (UsedRegs[PPC::V##N]) UsedRegMask |= 1 << (31-N) + HANDLEREG( 0); HANDLEREG( 1); HANDLEREG( 2); HANDLEREG( 3); + HANDLEREG( 4); HANDLEREG( 5); HANDLEREG( 6); HANDLEREG( 7); + HANDLEREG( 8); HANDLEREG( 9); HANDLEREG(10); HANDLEREG(11); + HANDLEREG(12); HANDLEREG(13); HANDLEREG(14); HANDLEREG(15); + HANDLEREG(16); HANDLEREG(17); HANDLEREG(18); HANDLEREG(19); + HANDLEREG(20); HANDLEREG(21); HANDLEREG(22); HANDLEREG(23); + HANDLEREG(24); HANDLEREG(25); HANDLEREG(26); HANDLEREG(27); + HANDLEREG(28); HANDLEREG(29); HANDLEREG(30); HANDLEREG(31); +#undef HANDLEREG + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + // If no registers are used, turn this into a copy. + if (UsedRegMask == 0) { + if (SrcReg != DstReg) + BuildMI(*MI->getParent(), MI, PPC::OR4, 2, DstReg) + .addReg(SrcReg).addReg(SrcReg); + } else if ((UsedRegMask & 0xFFFF) == UsedRegMask) { + BuildMI(*MI->getParent(), MI, PPC::ORI, 2, DstReg) + .addReg(SrcReg).addImm(UsedRegMask); + } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { + BuildMI(*MI->getParent(), MI, PPC::ORIS, 2, DstReg) + .addReg(SrcReg).addImm(UsedRegMask >> 16); + } else { + BuildMI(*MI->getParent(), MI, PPC::ORIS, 2, DstReg) + .addReg(SrcReg).addImm(UsedRegMask >> 16); + BuildMI(*MI->getParent(), MI, PPC::ORI, 2, DstReg) + .addReg(DstReg).addImm(UsedRegMask & 0xFFFF); + } + + // Remove the old UPDATE_VRSAVE instruction. + MI->getParent()->erase(MI); +} + void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); + // Scan the first few instructions of the prolog, looking for an UPDATE_VRSAVE + // instruction. If we find it, process it. + for (unsigned i = 0; MBBI != MBB.end() && i < 5; ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, MF.getUsedPhysregs()); + break; + } + } + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + // Get the number of bytes to allocate from the FrameInfo unsigned NumBytes = MFI->getStackSize(); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index f34950054a5..68c73bce9c1 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -152,9 +152,9 @@ def GPRC : RegisterClass<"PPC", [i32], 32, GPRCClass::iterator GPRCClass::allocation_order_end(MachineFunction &MF) const { if (hasFP(MF)) - return end()-4; + return end()-4; // don't allocate R31, R0, R1, LR else - return end()-3; + return end()-3; // don't allocate R0, R1, LR } }]; }