diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 3868f63b3a8..3be1b25b363 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -17,6 +17,7 @@ #include "AMDGPUTargetMachine.h" #include "SIDefines.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" @@ -401,38 +402,21 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); - MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineFrameInfo *FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); - unsigned KillFlag = isKill ? RegState::Kill : 0; if (RI.hasVGPRs(RC)) { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!"); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) .addReg(SrcReg); - } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { - unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF); - unsigned TgtReg = MFI->SpillTracker.LaneVGPR; - - BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg) - .addReg(SrcReg, KillFlag) - .addImm(Lane); - MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane); } else if (RI.isSGPRClass(RC)) { // We are only allowed to create one new instruction when spilling - // registers, so we need to use pseudo instruction for vector - // registers. - // - // Reserve a spot in the spill tracker for each sub-register of - // the vector register. - unsigned NumSubRegs = RC->getSize() / 4; - unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs); - MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, - FirstLane); - + // registers, so we need to use pseudo instruction for spilling + // SGPRs. unsigned Opcode; switch (RC->getSize() * 8) { + case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; @@ -440,9 +424,10 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Cannot spill register class"); } - BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR) + FrameInfo->setObjectAlignment(FrameIndex, 4); + BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg) - .addImm(FrameIndex); + .addFrameIndex(FrameIndex); } else { llvm_unreachable("VGPR spilling not supported"); } @@ -454,7 +439,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); + MachineFrameInfo *FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); if (RI.hasVGPRs(RC)) { @@ -473,38 +458,14 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Cannot spill register class"); } - SIMachineFunctionInfo::SpilledReg Spill = - MFI->SpillTracker.getSpilledReg(FrameIndex); - + FrameInfo->setObjectAlignment(FrameIndex, 4); BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addReg(Spill.VGPR) - .addImm(FrameIndex); + .addFrameIndex(FrameIndex); } else { llvm_unreachable("VGPR spilling not supported"); } } -static unsigned getNumSubRegsForSpillOp(unsigned Op) { - - switch (Op) { - case AMDGPU::SI_SPILL_S512_SAVE: - case AMDGPU::SI_SPILL_S512_RESTORE: - return 16; - case AMDGPU::SI_SPILL_S256_SAVE: - case AMDGPU::SI_SPILL_S256_RESTORE: - return 8; - case AMDGPU::SI_SPILL_S128_SAVE: - case AMDGPU::SI_SPILL_S128_RESTORE: - return 4; - case AMDGPU::SI_SPILL_S64_SAVE: - case AMDGPU::SI_SPILL_S64_RESTORE: - return 2; - case AMDGPU::SI_SPILL_S32_RESTORE: - return 1; - default: llvm_unreachable("Invalid spill opcode"); - } -} - void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, int Count) const { while (Count > 0) { @@ -520,59 +481,11 @@ void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, } bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - SIMachineFunctionInfo *MFI = - MI->getParent()->getParent()->getInfo(); MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MBB.findDebugLoc(MI); switch (MI->getOpcode()) { default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); - // SGPR register spill - case AMDGPU::SI_SPILL_S512_SAVE: - case AMDGPU::SI_SPILL_S256_SAVE: - case AMDGPU::SI_SPILL_S128_SAVE: - case AMDGPU::SI_SPILL_S64_SAVE: { - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); - unsigned FrameIndex = MI->getOperand(2).getImm(); - - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - SIMachineFunctionInfo::SpilledReg Spill; - unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(), - &AMDGPU::SGPR_32RegClass, i); - Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); - - BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), - MI->getOperand(0).getReg()) - .addReg(SubReg) - .addImm(Spill.Lane + i); - } - MI->eraseFromParent(); - break; - } - - // SGPR register restore - case AMDGPU::SI_SPILL_S512_RESTORE: - case AMDGPU::SI_SPILL_S256_RESTORE: - case AMDGPU::SI_SPILL_S128_RESTORE: - case AMDGPU::SI_SPILL_S64_RESTORE: - case AMDGPU::SI_SPILL_S32_RESTORE: { - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); - - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - SIMachineFunctionInfo::SpilledReg Spill; - unsigned FrameIndex = MI->getOperand(2).getImm(); - unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(), - &AMDGPU::SGPR_32RegClass, i); - Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); - - BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(MI->getOperand(1).getReg()) - .addImm(Spill.Lane + i); - } - insertNOPs(MI, 3); - MI->eraseFromParent(); - break; - } case AMDGPU::SI_CONSTDATA_PTR: { unsigned Reg = MI->getOperand(0).getReg(); unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index fd011a16067..01db459e3f2 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1729,14 +1729,14 @@ def V_SUB_F64 : InstSI < multiclass SI_SPILL_SGPR { def _SAVE : InstSI < - (outs VReg_32:$dst), + (outs), (ins sgpr_class:$src, i32imm:$frame_idx), "", [] >; def _RESTORE : InstSI < (outs sgpr_class:$dst), - (ins VReg_32:$src, i32imm:$frame_idx), + (ins i32imm:$frame_idx), "", [] >; diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index c53a7e10d54..086f0908703 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -12,6 +12,7 @@ #include "SIMachineFunctionInfo.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -90,8 +91,49 @@ void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex, int Lane) { SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane); } +/// \brief Returns a register that is not used at any point in the function. +/// If all registers are used, then this function will return +// AMDGPU::NoRegister. +static unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) { -const SIMachineFunctionInfo::SpilledReg& -SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) { - return SpilledRegisters[FrameIndex]; + const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (!MRI.isPhysRegUsed(*I)) + return *I; + } + return AMDGPU::NoRegister; +} + +SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( + MachineFunction *MF, + unsigned FrameIndex, + unsigned SubIdx) { + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); + Offset += SubIdx * 4; + + unsigned LaneVGPRIdx = Offset / (64 * 4); + unsigned Lane = (Offset / 4) % 64; + + struct SpilledReg Spill; + + if (!LaneVGPRs.count(LaneVGPRIdx)) { + unsigned LaneVGPR = findUnusedVGPR(MRI); + LaneVGPRs[LaneVGPRIdx] = LaneVGPR; + MRI.setPhysRegUsed(LaneVGPR); + + // Add this register as live-in to all blocks to avoid machine verifer + // complaining about use of an undefined physical register. + for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); + BI != BE; ++BI) { + BI->addLiveIn(LaneVGPR); + } + } + + Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; + Spill.Lane = Lane; + return Spill; } diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 707c41aba71..fcaa23a619b 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -57,9 +57,12 @@ public: // SIMachineFunctionInfo definition SIMachineFunctionInfo(const MachineFunction &MF); + SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex, + unsigned SubIdx); unsigned PSInputAddr; struct RegSpillTracker SpillTracker; unsigned NumUserSGPRs; + std::map LaneVGPRs; }; } // End namespace llvm diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 531c5047f50..8663df88922 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -20,6 +20,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" using namespace llvm; @@ -43,23 +45,110 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const return Fn.getFrameInfo()->hasStackObjects(); } +static unsigned getNumSubRegsForSpillOp(unsigned Op) { + + switch (Op) { + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S512_RESTORE: + return 16; + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S256_RESTORE: + return 8; + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S128_RESTORE: + return 4; + case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S64_RESTORE: + return 2; + case AMDGPU::SI_SPILL_S32_SAVE: + case AMDGPU::SI_SPILL_S32_RESTORE: + return 1; + default: llvm_unreachable("Invalid spill opcode"); + } +} + void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); + MachineBasicBlock *MBB = MI->getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); const SIInstrInfo *TII = static_cast(ST.getInstrInfo()); + DebugLoc DL = MI->getDebugLoc(); + MachineOperand &FIOp = MI->getOperand(FIOperandNum); int Index = MI->getOperand(FIOperandNum).getIndex(); - int64_t Offset = FrameInfo->getObjectOffset(Index); - FIOp.ChangeToImmediate(Offset); - if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { - unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj); - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) - .addImm(Offset); - FIOp.ChangeToRegister(TmpReg, false); + switch (MI->getOpcode()) { + // SGPR register spill + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S32_SAVE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), + &AMDGPU::SGPR_32RegClass, i); + struct SIMachineFunctionInfo::SpilledReg Spill = + MFI->getSpilledReg(MF, Index, i); + + if (Spill.VGPR == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("Ran out of VGPRs for spilling SGPR"); + } + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) + .addReg(SubReg) + .addImm(Spill.Lane); + + } + MI->eraseFromParent(); + break; + } + + // SGPR register restore + case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S128_RESTORE: + case AMDGPU::SI_SPILL_S64_RESTORE: + case AMDGPU::SI_SPILL_S32_RESTORE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), + &AMDGPU::SGPR_32RegClass, i); + struct SIMachineFunctionInfo::SpilledReg Spill = + MFI->getSpilledReg(MF, Index, i); + + if (Spill.VGPR == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("Ran out of VGPRs for spilling SGPR"); + } + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); + + } + TII->insertNOPs(MI, 3); + MI->eraseFromParent(); + break; + } + + default: { + int64_t Offset = FrameInfo->getObjectOffset(Index); + FIOp.ChangeToImmediate(Offset); + if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { + unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj); + BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + .addImm(Offset); + FIOp.ChangeToRegister(TmpReg, false); + } + } } }