diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 5d08b91ea7b..454b7c2d55f 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -187,27 +187,45 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned KillFlag = isKill ? RegState::Kill : 0; + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { - unsigned Lane = MFI->SpillTracker.getNextLane(MRI); - BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), - MFI->SpillTracker.LaneVGPR) + unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent()); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR) .addReg(SrcReg, KillFlag) .addImm(Lane); + MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane); + } else if (RI.isSGPRClass(RC)) { + // We are only allowed to create one new instruction when spilling + // registers, so we need to use pseudo instruction for vector + // registers. + // + // Reserve a spot in the spill tracker for each sub-register of + // the vector register. + unsigned NumSubRegs = RC->getSize() / 4; + unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(), + NumSubRegs); MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, - Lane); - } else { - for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { - unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg) - .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); - storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i, - &AMDGPU::SReg_32RegClass, TRI); + FirstLane); + + unsigned Opcode; + switch (RC->getSize() * 8) { + case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; + case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; + case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; + case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; + default: llvm_unreachable("Cannot spill register class"); } + + BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR) + .addReg(SrcReg) + .addImm(FrameIndex); + } else { + llvm_unreachable("VGPR spilling not supported"); } } @@ -216,32 +234,127 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); DebugLoc DL = MBB.findDebugLoc(MI); if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { - SIMachineFunctionInfo::SpilledReg Spill = + SIMachineFunctionInfo::SpilledReg Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); assert(Spill.VGPR); BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); - } else { - for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { - unsigned Flags = RegState::Define; - if (i == 0) { - Flags |= RegState::Undef; - } - unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i, - &AMDGPU::SReg_32RegClass, TRI); - BuildMI(MBB, MI, DL, get(AMDGPU::COPY)) - .addReg(DestReg, Flags, RI.getSubRegFromChannel(i)) - .addReg(SubReg); + insertNOPs(MI, 3); + } else if (RI.isSGPRClass(RC)){ + unsigned Opcode; + switch(RC->getSize() * 8) { + case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; + case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; + case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; + case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; + default: llvm_unreachable("Cannot spill register class"); } + + SIMachineFunctionInfo::SpilledReg Spill = + MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(Opcode), DestReg) + .addReg(Spill.VGPR) + .addImm(FrameIndex); + insertNOPs(MI, 3); + } else { + llvm_unreachable("VGPR spilling not supported"); } } +static unsigned getNumSubRegsForSpillOp(unsigned Op) { + + switch (Op) { + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S512_RESTORE: + return 16; + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S256_RESTORE: + return 8; + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S128_RESTORE: + return 4; + case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S64_RESTORE: + return 2; + default: llvm_unreachable("Invalid spill opcode"); + } +} + +void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, + int Count) const { + while (Count > 0) { + int Arg; + if (Count >= 8) + Arg = 7; + else + Arg = Count - 1; + Count -= 8; + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) + .addImm(Arg); + } +} + +bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + SIMachineFunctionInfo *MFI = + MI->getParent()->getParent()->getInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MBB.findDebugLoc(MI); + switch (MI->getOpcode()) { + default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); + + // SGPR register spill + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S64_SAVE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + unsigned FrameIndex = MI->getOperand(2).getImm(); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + SIMachineFunctionInfo::SpilledReg Spill; + unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(), + &AMDGPU::SGPR_32RegClass, i); + Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), + MI->getOperand(0).getReg()) + .addReg(SubReg) + .addImm(Spill.Lane + i); + } + MI->eraseFromParent(); + break; + } + + // SGPR register restore + case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S128_RESTORE: + case AMDGPU::SI_SPILL_S64_RESTORE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + SIMachineFunctionInfo::SpilledReg Spill; + unsigned FrameIndex = MI->getOperand(2).getImm(); + unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(), + &AMDGPU::SGPR_32RegClass, i); + Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(MI->getOperand(1).getReg()) + .addImm(Spill.Lane + i); + } + MI->eraseFromParent(); + break; + } + } + return true; +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 63f1d7fdee8..d7992742216 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -73,6 +73,8 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + unsigned commuteOpcode(unsigned Opcode) const; MachineInstr *commuteInstruction(MachineInstr *MI, @@ -165,6 +167,8 @@ public: void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I, unsigned SavReg, unsigned IndexReg) const; + + void insertNOPs(MachineBasicBlock::iterator MI, int Count) const; }; namespace AMDGPU { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 27e7abe1a38..b93de36ddf8 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -369,7 +369,7 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; let Predicates = [isSI] in { -//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; +def S_NOP : SOPP <0x00000000, (ins i16imm:$SIMM16), "S_NOP $SIMM16", []>; let isTerminator = 1 in { @@ -1574,6 +1574,27 @@ def V_SUB_F64 : InstSI < } // end usesCustomInserter +multiclass SI_SPILL_SGPR { + + def _SAVE : InstSI < + (outs VReg_32:$dst), + (ins sgpr_class:$src, i32imm:$frame_idx), + "", [] + >; + + def _RESTORE : InstSI < + (outs sgpr_class:$dst), + (ins VReg_32:$src, i32imm:$frame_idx), + "", [] + >; + +} + +defm SI_SPILL_S64 : SI_SPILL_SGPR ; +defm SI_SPILL_S128 : SI_SPILL_SGPR ; +defm SI_SPILL_S256 : SI_SPILL_SGPR ; +defm SI_SPILL_S512 : SI_SPILL_SGPR ; + } // end IsCodeGenOnly, isPseudo def : Pat< diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index ea04346e509..af609958129 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,8 +10,11 @@ #include "SIMachineFunctionInfo.h" +#include "SIInstrInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #define MAX_LANES 64 @@ -26,21 +29,57 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PSInputAddr(0), SpillTracker() { } -static unsigned createLaneVGPR(MachineRegisterInfo &MRI) { - return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); -} +static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) { + unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); -unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) { - if (!LaneVGPR) { - LaneVGPR = createLaneVGPR(MRI); - } else { - CurrentLane++; - if (CurrentLane == MAX_LANES) { - CurrentLane = 0; - LaneVGPR = createLaneVGPR(MRI); + // We need to add this register as live out for the function, in order to + // have the live range calculated directly. + // + // When register spilling begins, we have already calculated the live + // live intervals for all the registers. Since we are spilling SGPRs to + // VGPRs, we need to update the Lane VGPR's live interval every time we + // spill or restore a register. + // + // Unfortunately, there is no good way to update the live interval as + // the TargetInstrInfo callbacks for spilling and restoring don't give + // us access to the live interval information. + // + // We are lucky, though, because the InlineSpiller calls + // LiveRangeEdit::calculateRegClassAndHint() which iterates through + // all the new register that have been created when restoring a register + // and calls LiveIntervals::getInterval(), which creates and computes + // the live interval for the newly created register. However, once this + // live intervals is created, it doesn't change and since we usually reuse + // the Lane VGPR multiple times, this means any uses after the first aren't + // added to the live interval. + // + // To work around this, we add Lane VGPRs to the functions live out list, + // so that we can guarantee its live range will cover all of its uses. + + for (MachineBasicBlock &MBB : *MF) { + if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) { + MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true)); + return VGPR; } } - return CurrentLane; + MF->getFunction()->getContext().emitError( + "Could not found S_ENGPGM instrtuction."); + return VGPR; +} + +unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes( + MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) { + unsigned StartLane = CurrentLane; + CurrentLane += NumRegs; + if (!LaneVGPR) { + LaneVGPR = createLaneVGPR(MRI, MF); + } else { + if (CurrentLane >= MAX_LANES) { + StartLane = CurrentLane = 0; + LaneVGPR = createLaneVGPR(MRI, MF); + } + } + return StartLane; } void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex, diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index ef38270d51a..96e619bde8d 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -43,7 +43,12 @@ public: public: unsigned LaneVGPR; RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { } - unsigned getNextLane(MachineRegisterInfo &MRI); + /// \p NumRegs The number of consecutive registers what need to be spilled. + /// This function will ensure that all registers are stored in + /// the same VGPR. + /// \returns The lane to be used for storing the first register. + unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF, + unsigned NumRegs = 1); void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1); const SpilledReg& getSpilledReg(unsigned FrameIndex); bool programSpillsRegisters() { return !SpilledRegisters.empty(); } diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 5897fbca94c..8dc9a05799c 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -129,3 +129,10 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( return &AMDGPU::VGPR_32RegClass; } } + +unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, + const TargetRegisterClass *SubRC, + unsigned Channel) const { + unsigned Index = getHWRegIndex(Reg); + return SubRC->getRegister(Index + Channel); +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 54717c184bc..36b4fcd32a8 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -63,6 +63,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// be returned. const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const; + + /// \p Channel This is the register channel (e.g. a value from 0-16), not the + /// SubReg index. + /// \returns The sub-register of Reg that is in Channel. + unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC, + unsigned Channel) const; }; } // End namespace llvm